3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Dates qw(format_date);
26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
28 # set the version for version checking
31 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
36 C4::Search - Functions for searching the Koha catalog.
40 See opac/opac-search.pl or catalogue/search.pl for example of usage
44 This module provides searching functions for Koha's bibliographic databases
62 # make all your functions, whether exported or not;
64 =head2 findseealso($dbh,$fields);
66 C<$dbh> is a link to the DB handler.
69 my $dbh =C4::Context->dbh;
71 C<$fields> is a reference to the fields array
73 This function modifies the @$fields array and adds related fields to search on.
75 FIXME: this function is probably deprecated in Koha 3
80 my ( $dbh, $fields ) = @_;
81 my $tagslib = GetMarcStructure(1);
82 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
83 my ($tag) = substr( @$fields[$i], 1, 3 );
84 my ($subfield) = substr( @$fields[$i], 4, 1 );
85 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
86 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
92 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
94 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
100 my $dbh = C4::Context->dbh;
101 my $result = TransformMarcToKoha( $dbh, $record, '' );
106 my ( $biblionumber, $title );
108 # search duplicate on ISBN, easy and fast..
109 # ... normalize first
110 if ( $result->{isbn} ) {
111 $result->{isbn} =~ s/\(.*$//;
112 $result->{isbn} =~ s/\s+$//;
113 $query = "isbn=$result->{isbn}";
116 $result->{title} =~ s /\\//g;
117 $result->{title} =~ s /\"//g;
118 $result->{title} =~ s /\(//g;
119 $result->{title} =~ s /\)//g;
121 # FIXME: instead of removing operators, could just do
122 # quotes around the value
123 $result->{title} =~ s/(and|or|not)//g;
124 $query = "ti,ext=$result->{title}";
125 $query .= " and itemtype=$result->{itemtype}"
126 if ( $result->{itemtype} );
127 if ( $result->{author} ) {
128 $result->{author} =~ s /\\//g;
129 $result->{author} =~ s /\"//g;
130 $result->{author} =~ s /\(//g;
131 $result->{author} =~ s /\)//g;
133 # remove valid operators
134 $result->{author} =~ s/(and|or|not)//g;
135 $query .= " and au,ext=$result->{author}";
139 # FIXME: add error handling
140 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
142 foreach my $possible_duplicate_record (@$searchresults) {
144 MARC::Record->new_from_usmarc($possible_duplicate_record);
145 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
147 # FIXME :: why 2 $biblionumber ?
149 push @results, $result->{'biblionumber'};
150 push @results, $result->{'title'};
158 ($error,$results) = SimpleSearch($query,@servers);
160 This function provides a simple search API on the bibliographic catalog
166 * $query can be a simple keyword or a complete CCL query
167 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
170 * $error is a empty unless an error is detected
171 * \@results is an array of records.
173 =item C<usage in the script:>
177 my ($error, $marcresults) = SimpleSearch($query);
179 if (defined $error) {
180 $template->param(query_error => $error);
181 warn "error: ".$error;
182 output_html_with_http_headers $input, $cookie, $template->output;
186 my $hits = scalar @$marcresults;
189 for(my $i=0;$i<$hits;$i++) {
191 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
192 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
194 #build the hash for the template.
195 $resultsloop{highlight} = ($i % 2)?(1):(0);
196 $resultsloop{title} = $biblio->{'title'};
197 $resultsloop{subtitle} = $biblio->{'subtitle'};
198 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
199 $resultsloop{author} = $biblio->{'author'};
200 $resultsloop{publishercode} = $biblio->{'publishercode'};
201 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
203 push @results, \%resultsloop;
206 $template->param(result=>\@results);
212 if ( C4::Context->preference('NoZebra') ) {
213 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
216 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
217 return ( undef, $search_result );
224 return ( "No query entered", undef ) unless $query;
226 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
227 @servers = ("biblioserver") unless @servers;
229 # Initialize & Search Zebra
230 for ( my $i = 0 ; $i < @servers ; $i++ ) {
232 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
235 ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
239 $zconns[$i]->errmsg() . " ("
240 . $zconns[$i]->errcode() . ") "
241 . $zconns[$i]->addinfo() . " "
242 . $zconns[$i]->diagset();
244 return ( $error, undef ) if $zconns[$i]->errcode();
248 # caught a ZOOM::Exception
252 . $@->addinfo() . " "
255 return ( $error, undef );
260 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
261 $ev = $zconns[ $i - 1 ]->last_event();
262 if ( $ev == ZOOM::Event::ZEND ) {
263 $hits = $tmpresults[ $i - 1 ]->size();
266 for ( my $j = 0 ; $j < $hits ; $j++ ) {
267 my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
268 push @results, $record;
272 return ( undef, \@results );
278 ( undef, $results_hashref, \@facets_loop ) = getRecords (
280 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
281 $results_per_page, $offset, $expanded_facet, $branches,
285 The all singing, all dancing, multi-server, asynchronous, scanning,
286 searching, record nabbing, facet-building
288 See verbse embedded documentation.
294 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
295 $results_per_page, $offset, $expanded_facet, $branches,
299 my @servers = @$servers_ref;
300 my @sort_by = @$sort_by_ref;
302 # Initialize variables for the ZOOM connection and results object
306 my $results_hashref = ();
308 # Initialize variables for the faceted results objects
309 my $facets_counter = ();
310 my $facets_info = ();
311 my $facets = getFacets();
314 ; # stores the ref to array of hashes for template facets loop
316 ### LOOP THROUGH THE SERVERS
317 for ( my $i = 0 ; $i < @servers ; $i++ ) {
318 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
320 # perform the search, create the results objects
321 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
323 if ( $servers[$i] =~ /biblioserver/ ) {
324 $query_to_use = $koha_query;
327 $query_to_use = $simple_query;
330 #$query_to_use = $simple_query if $scan;
331 warn $simple_query if ( $scan and $DEBUG );
333 # Check if we've got a query_type defined, if so, use it
337 if ( $query_type =~ /^ccl/ ) {
339 s/\:/\=/g; # change : to = last minute (FIXME)
342 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
345 elsif ( $query_type =~ /^cql/ ) {
348 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
350 elsif ( $query_type =~ /^pqf/ ) {
353 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
360 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
366 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
372 warn "WARNING: query problem with $query_to_use " . $@;
375 # Concatenate the sort_by limits and pass them to the results object
376 # Note: sort will override rank
378 foreach my $sort (@sort_by) {
379 if ( $sort eq "author_az" ) {
380 $sort_by .= "1=1003 <i ";
382 elsif ( $sort eq "author_za" ) {
383 $sort_by .= "1=1003 >i ";
385 elsif ( $sort eq "popularity_asc" ) {
386 $sort_by .= "1=9003 <i ";
388 elsif ( $sort eq "popularity_dsc" ) {
389 $sort_by .= "1=9003 >i ";
391 elsif ( $sort eq "call_number_asc" ) {
392 $sort_by .= "1=20 <i ";
394 elsif ( $sort eq "call_number_dsc" ) {
395 $sort_by .= "1=20 >i ";
397 elsif ( $sort eq "pubdate_asc" ) {
398 $sort_by .= "1=31 <i ";
400 elsif ( $sort eq "pubdate_dsc" ) {
401 $sort_by .= "1=31 >i ";
403 elsif ( $sort eq "acqdate_asc" ) {
404 $sort_by .= "1=32 <i ";
406 elsif ( $sort eq "acqdate_dsc" ) {
407 $sort_by .= "1=32 >i ";
409 elsif ( $sort eq "title_az" ) {
410 $sort_by .= "1=4 <i ";
412 elsif ( $sort eq "title_za" ) {
413 $sort_by .= "1=4 >i ";
417 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
418 warn "WARNING sort $sort_by failed";
421 } # finished looping through servers
423 # The big moment: asynchronously retrieve results from all servers
424 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
425 my $ev = $zconns[ $i - 1 ]->last_event();
426 if ( $ev == ZOOM::Event::ZEND ) {
427 next unless $results[ $i - 1 ];
428 my $size = $results[ $i - 1 ]->size();
432 # loop through the results
433 $results_hash->{'hits'} = $size;
435 if ( $offset + $results_per_page <= $size ) {
436 $times = $offset + $results_per_page;
441 for ( my $j = $offset ; $j < $times ; $j++ ) {
446 ## Check if it's an index scan
448 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
450 # here we create a minimal MARC record and hand it off to the
451 # template just like a normal result ... perhaps not ideal, but
453 my $tmprecord = MARC::Record->new();
454 $tmprecord->encoding('UTF-8');
458 # the minimal record in author/title (depending on MARC flavour)
459 if ( C4::Context->preference("marcflavour") eq
462 $tmptitle = MARC::Field->new(
470 MARC::Field->new( '245', ' ', ' ', a => $term, );
472 MARC::Field->new( '100', ' ', ' ', a => $occ, );
474 $tmprecord->append_fields($tmptitle);
475 $tmprecord->append_fields($tmpauthor);
476 $results_hash->{'RECORDS'}[$j] =
477 $tmprecord->as_usmarc();
482 $record = $results[ $i - 1 ]->record($j)->raw();
484 # warn "RECORD $j:".$record;
485 $results_hash->{'RECORDS'}[$j] = $record;
487 # Fill the facets while we're looping, but only for the biblioserver
488 $facet_record = MARC::Record->new_from_usmarc($record)
489 if $servers[ $i - 1 ] =~ /biblioserver/;
491 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
493 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
495 if ( $facets->[$k] ) {
497 for my $tag ( @{ $facets->[$k]->{'tags'} } )
500 $facet_record->field($tag);
502 for my $field (@fields) {
503 my @subfields = $field->subfields();
504 for my $subfield (@subfields) {
505 my ( $code, $data ) = @$subfield;
507 $facets->[$k]->{'subfield'} )
509 $facets_counter->{ $facets->[$k]
515 $facets_info->{ $facets->[$k]
516 ->{'link_value'} }->{'label_value'} =
517 $facets->[$k]->{'label_value'};
518 $facets_info->{ $facets->[$k]
519 ->{'link_value'} }->{'expanded'} =
520 $facets->[$k]->{'expanded'};
526 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
529 # warn "connection ", $i-1, ": $size hits";
530 # warn $results[$i-1]->record(0)->render() if $size > 0;
533 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
535 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
536 keys %$facets_counter )
539 my $number_of_facets;
540 my @this_facets_array;
543 $facets_counter->{$link_value}
544 ->{$b} <=> $facets_counter->{$link_value}->{$a}
545 } keys %{ $facets_counter->{$link_value} }
549 if ( ( $number_of_facets < 6 )
550 || ( $expanded_facet eq $link_value )
551 || ( $facets_info->{$link_value}->{'expanded'} ) )
554 # Sanitize the link value ), ( will cause errors with CCL,
555 my $facet_link_value = $one_facet;
556 $facet_link_value =~ s/(\(|\))/ /g;
558 # fix the length that will display in the label,
559 my $facet_label_value = $one_facet;
561 substr( $one_facet, 0, 20 ) . "..."
562 unless length($facet_label_value) <= 20;
564 # if it's a branch, label by the name, not the code,
565 if ( $link_value =~ /branch/ ) {
567 $branches->{$one_facet}->{'branchname'};
570 # but we're down with the whole label being in the link's title.
571 my $facet_title_value = $one_facet;
573 push @this_facets_array,
577 $facets_counter->{$link_value}
579 facet_label_value => $facet_label_value,
580 facet_title_value => $facet_title_value,
581 facet_link_value => $facet_link_value,
582 type_link_value => $link_value,
588 # handle expanded option
589 unless ( $facets_info->{$link_value}->{'expanded'} ) {
591 if ( ( $number_of_facets > 6 )
592 && ( $expanded_facet ne $link_value ) );
597 type_link_value => $link_value,
598 type_id => $link_value . "_id",
600 $facets_info->{$link_value}->{'label_value'},
601 facets => \@this_facets_array,
602 expandable => $expandable,
603 expand => $link_value,
610 return ( undef, $results_hashref, \@facets_loop );
614 sub _remove_stopwords {
615 my ( $operand, $index ) = @_;
616 my @stopwords_removed;
618 # phrase and exact-qualified indexes shouldn't have stopwords removed
619 if ( $index !~ m/phr|ext/ ) {
621 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
622 # we use IsAlpha unicode definition, to deal correctly with diacritics.
623 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
624 # is a stopword, we'd get "çon" and wouldn't find anything...
625 foreach ( keys %{ C4::Context->stopwords } ) {
626 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
628 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
630 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
631 $operand =~ s/^$_\P{IsAlpha}/ /gi;
632 $operand =~ s/\P{IsAlpha}$_$/ /gi;
633 push @stopwords_removed, $_;
637 return ( $operand, \@stopwords_removed );
641 sub _detect_truncation {
642 my ( $operand, $index ) = @_;
643 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
646 my @wordlist = split( /\s/, $operand );
647 foreach my $word (@wordlist) {
648 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
649 push @rightlefttruncated, $word;
651 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
652 push @lefttruncated, $word;
654 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
655 push @righttruncated, $word;
657 elsif ( index( $word, "*" ) < 0 ) {
658 push @nontruncated, $word;
661 push @regexpr, $word;
665 \@nontruncated, \@righttruncated, \@lefttruncated,
666 \@rightlefttruncated, \@regexpr
671 sub _build_stemmed_operand {
675 # FIXME: the locale should be set based on the user's language and/or search choice
676 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
678 # FIXME: these should be stored in the db so the librarian can modify the behavior
679 $stemmer->add_exceptions(
686 my @words = split( / /, $operand );
687 my $stems = $stemmer->stem(@words);
688 for my $stem (@$stems) {
689 $stemmed_operand .= "$stem";
690 $stemmed_operand .= "?"
691 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
692 $stemmed_operand .= " ";
694 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
695 return $stemmed_operand;
699 sub _build_weighted_query {
701 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
702 # pretty well but could work much better if we had a smarter query parser
703 my ( $operand, $stemmed_operand, $index ) = @_;
704 my $stemming = C4::Context->preference("QueryStemming") || 0;
705 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
706 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
708 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
710 # Keyword, or, no index specified
711 if ( ( $index eq 'kw' ) || ( !$index ) ) {
713 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
714 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
715 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
716 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
717 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
718 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
719 if $fuzzy_enabled; # add fuzzy, word list
720 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
721 if ( $stemming and $stemmed_operand )
722 ; # add stemming, right truncation
723 $weighted_query .= " or wrdl,r9=\"$operand\"";
725 # embedded sorting: 0 a-z; 1 z-a
726 # $weighted_query .= ") or (sort1,aut=1";
729 # Barcode searches should skip this process
730 elsif ( $index eq 'bc' ) {
731 $weighted_query .= "bc=\"$operand\"";
734 # Authority-number searches should skip this process
735 elsif ( $index eq 'an' ) {
736 $weighted_query .= "an=\"$operand\"";
739 # If the index already has more than one qualifier, wrap the operand
740 # in quotes and pass it back (assumption is that the user knows what they
741 # are doing and won't appreciate us mucking up their query
742 elsif ( $index =~ ',' ) {
743 $weighted_query .= " $index=\"$operand\"";
746 #TODO: build better cases based on specific search indexes
748 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
749 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
750 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
752 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
755 $weighted_query .= "))"; # close rank specification
756 return $weighted_query;
762 $simple_query, $query_cgi,
764 $limit_cgi, $limit_desc,
765 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
767 Build queries and limits in CCL, CGI, Human,
768 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
770 See verbose embedded documentation.
776 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
778 warn "---------" if $DEBUG;
779 warn "Enter buildQuery" if $DEBUG;
780 warn "---------" if $DEBUG;
783 my @operators = @$operators if $operators;
784 my @indexes = @$indexes if $indexes;
785 my @operands = @$operands if $operands;
786 my @limits = @$limits if $limits;
787 my @sort_by = @$sort_by if $sort_by;
789 my $stemming = C4::Context->preference("QueryStemming") || 0;
790 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
791 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
792 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
793 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
795 # no stemming/weight/fuzzy in NoZebra
796 if ( C4::Context->preference("NoZebra") ) {
802 my $query = $operands[0];
803 my $simple_query = $operands[0];
805 # initialize the variables we're passing back
814 my $stopwords_removed; # flag to determine if stopwords have been removed
816 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
818 if ( $query =~ /^ccl=/ ) {
819 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
821 if ( $query =~ /^cql=/ ) {
822 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
824 if ( $query =~ /^pqf=/ ) {
825 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
828 # pass nested queries directly
829 # FIXME: need better handling of some of these variables in this case
830 if ( $query =~ /(\(|\))/ ) {
832 undef, $query, $simple_query, $query_cgi,
833 $query, $limit, $limit_cgi, $limit_desc,
834 $stopwords_removed, 'ccl'
838 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
839 # query operands and indexes and add stemming, truncation, field weighting, etc.
840 # Once we do so, we'll end up with a value in $query, just like if we had an
841 # incoming $query from the user
844 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
846 ; # a flag used to keep track if there was a previous query
847 # if there was, we can apply the current operator
849 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
851 # COMBINE OPERANDS, INDEXES AND OPERATORS
852 if ( $operands[$i] ) {
854 # A flag to determine whether or not to add the index to the query
857 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
858 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
861 $remove_stopwords = 0;
863 my $operand = $operands[$i];
864 my $index = $indexes[$i];
866 # Add index-specific attributes
867 # Date of Publication
868 if ( $index eq 'yr' ) {
869 $index .= ",st-numeric";
872 $stemming, $auto_truncation,
873 $weight_fields, $fuzzy_enabled,
875 ) = ( 0, 0, 0, 0, 0 );
878 # Date of Acquisition
879 elsif ( $index eq 'acqdate' ) {
880 $index .= ",st-date-normalized";
883 $stemming, $auto_truncation,
884 $weight_fields, $fuzzy_enabled,
886 ) = ( 0, 0, 0, 0, 0 );
889 # Set default structure attribute (word list)
891 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
892 $struct_attr = ",wrdl";
895 # Some helpful index variants
896 my $index_plus = $index . $struct_attr . ":" if $index;
897 my $index_plus_comma = $index . $struct_attr . "," if $index;
900 if ($remove_stopwords) {
901 ( $operand, $stopwords_removed ) =
902 _remove_stopwords( $operand, $index );
903 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
904 warn "REMOVED STOPWORDS: @$stopwords_removed"
905 if ( $stopwords_removed && $DEBUG );
909 my ( $nontruncated, $righttruncated, $lefttruncated,
910 $rightlefttruncated, $regexpr );
911 my $truncated_operand;
913 $nontruncated, $righttruncated, $lefttruncated,
914 $rightlefttruncated, $regexpr
915 ) = _detect_truncation( $operand, $index );
917 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
922 scalar(@$righttruncated) + scalar(@$lefttruncated) +
923 scalar(@$rightlefttruncated) > 0 )
926 # Don't field weight or add the index to the query, we do it here
928 undef $weight_fields;
929 my $previous_truncation_operand;
930 if ( scalar(@$nontruncated) > 0 ) {
931 $truncated_operand .= "$index_plus @$nontruncated ";
932 $previous_truncation_operand = 1;
934 if ( scalar(@$righttruncated) > 0 ) {
935 $truncated_operand .= "and "
936 if $previous_truncation_operand;
937 $truncated_operand .=
938 "$index_plus_comma" . "rtrn:@$righttruncated ";
939 $previous_truncation_operand = 1;
941 if ( scalar(@$lefttruncated) > 0 ) {
942 $truncated_operand .= "and "
943 if $previous_truncation_operand;
944 $truncated_operand .=
945 "$index_plus_comma" . "ltrn:@$lefttruncated ";
946 $previous_truncation_operand = 1;
948 if ( scalar(@$rightlefttruncated) > 0 ) {
949 $truncated_operand .= "and "
950 if $previous_truncation_operand;
951 $truncated_operand .=
952 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
953 $previous_truncation_operand = 1;
956 $operand = $truncated_operand if $truncated_operand;
957 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
961 $stemmed_operand = _build_stemmed_operand($operand)
963 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
965 # Handle Field Weighting
966 my $weighted_operand;
968 _build_weighted_query( $operand, $stemmed_operand, $index )
970 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
971 $operand = $weighted_operand if $weight_fields;
972 $indexes_set = 1 if $weight_fields;
974 # If there's a previous operand, we need to add an operator
975 if ($previous_operand) {
977 # User-specified operator
978 if ( $operators[ $i - 1 ] ) {
979 $query .= " $operators[$i-1] ";
980 $query .= " $index_plus " unless $indexes_set;
981 $query .= " $operand";
982 $query_cgi .= "&op=$operators[$i-1]";
983 $query_cgi .= "&idx=$index" if $index;
984 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
986 " $operators[$i-1] $index_plus $operands[$i]";
989 # Default operator is and
992 $query .= "$index_plus " unless $indexes_set;
993 $query .= "$operand";
994 $query_cgi .= "&op=and&idx=$index" if $index;
995 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
996 $query_desc .= " and $index_plus $operands[$i]";
1000 # There isn't a pervious operand, don't need an operator
1003 # Field-weighted queries already have indexes set
1004 $query .= " $index_plus " unless $indexes_set;
1006 $query_desc .= " $index_plus $operands[$i]";
1007 $query_cgi .= "&idx=$index" if $index;
1008 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1009 $previous_operand = 1;
1014 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1017 my $group_OR_limits;
1018 my $availability_limit;
1019 foreach my $this_limit (@limits) {
1020 if ( $this_limit =~ /available/ ) {
1022 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1024 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1025 $availability_limit .=
1026 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1027 $limit_cgi .= "&limit=available";
1031 # group_OR_limits, prefixed by mc-
1032 # OR every member of the group
1033 elsif ( $this_limit =~ /mc/ ) {
1034 $group_OR_limits .= " or " if $group_OR_limits;
1035 $limit_desc .= " or " if $group_OR_limits;
1036 $group_OR_limits .= "$this_limit";
1037 $limit_cgi .= "&limit=$this_limit";
1038 $limit_desc .= " $this_limit";
1041 # Regular old limits
1043 $limit .= " and " if $limit || $query;
1044 $limit .= "$this_limit";
1045 $limit_cgi .= "&limit=$this_limit";
1046 $limit_desc .= " $this_limit";
1049 if ($group_OR_limits) {
1050 $limit .= " and " if ( $query || $limit );
1051 $limit .= "($group_OR_limits)";
1053 if ($availability_limit) {
1054 $limit .= " and " if ( $query || $limit );
1055 $limit .= "($availability_limit)";
1058 # Normalize the query and limit strings
1061 for ( $query, $query_desc, $limit, $limit_desc ) {
1062 $_ =~ s/ / /g; # remove extra spaces
1063 $_ =~ s/^ //g; # remove any beginning spaces
1064 $_ =~ s/ $//g; # remove any ending spaces
1065 $_ =~ s/==/=/g; # remove double == from query
1068 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1070 # append the limit to the query
1071 $query .= " " . $limit;
1075 warn "QUERY:" . $query;
1076 warn "QUERY CGI:" . $query_cgi;
1077 warn "QUERY DESC:" . $query_desc;
1078 warn "LIMIT:" . $limit;
1079 warn "LIMIT CGI:" . $limit_cgi;
1080 warn "LIMIT DESC:" . $limit_desc;
1082 warn "Leave buildQuery";
1086 undef, $query, $simple_query, $query_cgi,
1087 $query_desc, $limit, $limit_cgi, $limit_desc,
1088 $stopwords_removed, $query_type
1092 =head2 searchResults
1094 Format results in a form suitable for passing to the template
1098 # IMO this subroutine is pretty messy still -- it's responsible for
1099 # building the HTML output for the template
1101 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1102 my $dbh = C4::Context->dbh;
1107 # add search-term highlighting via <span>s on the search terms
1108 my $span_terms_hashref;
1109 for my $span_term ( split( / /, $searchdesc ) ) {
1110 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1111 $span_terms_hashref->{$span_term}++;
1114 #Build branchnames hash
1116 #get branch information.....
1119 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1120 ; # FIXME : use C4::Koha::GetBranches
1122 while ( my $bdata = $bsth->fetchrow_hashref ) {
1123 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1128 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1131 while ( my $ldata = $lsch->fetchrow_hashref ) {
1132 $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1135 #Build itemtype hash
1136 #find itemtype & itemtype image
1140 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1143 while ( my $bdata = $bsth->fetchrow_hashref ) {
1144 $itemtypes{ $bdata->{'itemtype'} }->{description} =
1145 $bdata->{'description'};
1146 $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1147 $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'};
1148 $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1149 $bdata->{'notforloan'};
1152 #search item field code
1155 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1158 my ($itemtag) = $sth->fetchrow;
1160 # get notforloan authorised value list
1163 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1166 my ($notforloan_authorised_value) = $sth->fetchrow;
1168 ## find column names of items related to MARC
1169 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1171 my %subfieldstosearch;
1172 while ( ( my $column ) = $sth2->fetchrow ) {
1173 my ( $tagfield, $tagsubfield ) =
1174 &GetMarcFromKohaField( "items." . $column, "" );
1175 $subfieldstosearch{$column} = $tagsubfield;
1178 # handle which records to actually retrieve
1180 if ( $hits && $offset + $results_per_page <= $hits ) {
1181 $times = $offset + $results_per_page;
1187 # loop through all of the records we've retrieved
1188 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1190 $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1191 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1192 $oldbiblio->{result_number} = $i + 1;
1194 # add imageurl to itemtype if there is one
1195 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1196 $oldbiblio->{imageurl} =
1197 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1198 $oldbiblio->{description} =
1199 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1202 $oldbiblio->{imageurl} =
1203 getitemtypeimagesrc() . "/"
1204 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1205 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1206 $oldbiblio->{description} =
1207 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1210 # Build summary if there is one (the summary is defined in the itemtypes table)
1211 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1212 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1213 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1214 my @fields = $marcrecord->fields();
1215 foreach my $field (@fields) {
1216 my $tag = $field->tag();
1217 my $tagvalue = $field->as_string();
1219 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1220 unless ( $tag < 10 ) {
1221 my @subf = $field->subfields;
1222 for my $i ( 0 .. $#subf ) {
1223 my $subfieldcode = $subf[$i][0];
1224 my $subfieldvalue = $subf[$i][1];
1225 my $tagsubf = $tag . $subfieldcode;
1227 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1232 $summary =~ s/\[(.*?)]//g;
1233 $summary =~ s/\n/<br>/g;
1234 $oldbiblio->{summary} = $summary;
1237 # Add search-term highlighting to the whole record where they match using <span>s
1238 my $searchhighlightblob;
1239 for my $highlight_field ( $marcrecord->fields ) {
1241 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1242 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1243 for my $subfield ($highlight_field->subfields()) {
1245 next if $subfield->[0] eq '9';
1246 my $field = $subfield->[1];
1247 for my $term ( keys %$span_terms_hashref ) {
1248 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1249 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1253 $searchhighlightblob .= $field . " ... " if $match;
1257 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1258 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1260 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1261 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1263 # Add search-term highlighting to the title, subtitle, etc. fields
1264 for my $term ( keys %$span_terms_hashref ) {
1265 my $old_term = $term;
1266 if ( length($term) > 3 ) {
1267 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1268 $oldbiblio->{'title'} =~
1269 s/$term/<span class=\"term\">$&<\/span>/gi;
1270 $oldbiblio->{'subtitle'} =~
1271 s/$term/<span class=\"term\">$&<\/span>/gi;
1272 $oldbiblio->{'author'} =~
1273 s/$term/<span class=\"term\">$&<\/span>/gi;
1274 $oldbiblio->{'publishercode'} =~
1275 s/$term/<span class=\"term\">$&<\/span>/gi;
1276 $oldbiblio->{'place'} =~
1277 s/$term/<span class=\"term\">$&<\/span>/gi;
1278 $oldbiblio->{'pages'} =~
1279 s/$term/<span class=\"term\">$&<\/span>/gi;
1280 $oldbiblio->{'notes'} =~
1281 s/$term/<span class=\"term\">$&<\/span>/gi;
1282 $oldbiblio->{'size'} =~
1283 s/$term/<span class=\"term\">$&<\/span>/gi;
1288 # surely there's a better way to handle this
1290 $toggle = "#ffffcc";
1295 $oldbiblio->{'toggle'} = $toggle;
1297 # Pull out the items fields
1298 my @fields = $marcrecord->field($itemtag);
1300 # Setting item statuses for display
1301 my @available_items_loop;
1302 my @onloan_items_loop;
1303 my @other_items_loop;
1305 my $available_items;
1309 my $ordered_count = 0;
1310 my $available_count = 0;
1311 my $onloan_count = 0;
1312 my $longoverdue_count = 0;
1313 my $other_count = 0;
1314 my $wthdrawn_count = 0;
1315 my $itemlost_count = 0;
1316 my $itembinding_count = 0;
1317 my $itemdamaged_count = 0;
1318 my $can_place_holds = 0;
1319 my $items_count = scalar(@fields);
1322 ( C4::Context->preference('maxItemsinSearchResults') )
1323 ? C4::Context->preference('maxItemsinSearchResults') - 1
1326 # loop through every item
1327 foreach my $field (@fields) {
1331 # populate the items hash
1332 foreach my $code ( keys %subfieldstosearch ) {
1333 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1336 # set item's branch name, use homebranch first, fall back to holdingbranch
1337 if ( $item->{'homebranch'} ) {
1338 $item->{'branchname'} = $branches{ $item->{homebranch} };
1342 elsif ( $item->{'holdingbranch'} ) {
1343 $item->{'branchname'} = $branches{ $item->{holdingbranch} };
1346 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1347 if ( $item->{onloan} ) {
1349 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{due_date} = format_date( $item->{onloan} );
1350 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{count}++ if $item->{'homebranch'};
1351 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{branchname} = $item->{'branchname'};
1352 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{location} = $locations{ $item->{location} };
1353 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{itemcallnumber} = $item->{itemcallnumber};
1354 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1355 # if something's checked out and lost, mark it as 'long overdue'
1356 if ( $item->{itemlost} ) {
1357 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{longoverdue}++;
1358 $longoverdue_count++;
1361 # can place holds as long as this item isn't lost
1363 $can_place_holds = 1;
1367 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1371 if ( $item->{notforloan} == -1 ) {
1375 # item is withdrawn, lost or damaged
1376 if ( $item->{wthdrawn}
1377 || $item->{itemlost}
1379 || $item->{notforloan} )
1381 $wthdrawn_count++ if $item->{wthdrawn};
1382 $itemlost_count++ if $item->{itemlost};
1383 $itemdamaged_count++ if $item->{damaged};
1384 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1387 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{wthdrawn} = $item->{wthdrawn};
1388 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemlost} = $item->{itemlost};
1389 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{damaged} = $item->{damaged};
1390 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{notforloan} = GetAuthorisedValueDesc( '', '', $item->{notforloan}, '', '', $notforloan_authorised_value ) if $notforloan_authorised_value;
1391 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{count}++ if $item->{'homebranch'};
1392 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{branchname} = $item->{'branchname'};
1393 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{location} = $locations{ $item->{location} };
1394 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1395 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1400 $can_place_holds = 1;
1402 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1403 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1404 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{location} = $locations{ $item->{location} };
1405 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1406 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1409 } # notforloan, item level and biblioitem level
1410 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1412 ( C4::Context->preference('maxItemsinSearchResults') )
1413 ? C4::Context->preference('maxItemsinSearchResults') - 1
1415 for my $key ( sort keys %$onloan_items ) {
1416 $onloanitemscount++;
1417 push @onloan_items_loop, $onloan_items->{$key}
1418 unless $onloanitemscount > $maxitems;
1420 for my $key ( sort keys %$other_items ) {
1422 push @other_items_loop, $other_items->{$key}
1423 unless $otheritemscount > $maxitems;
1425 for my $key ( sort keys %$available_items ) {
1426 $availableitemscount++;
1427 push @available_items_loop, $available_items->{$key}
1428 unless $availableitemscount > $maxitems;
1431 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1432 $can_place_holds = 0
1433 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1434 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1435 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1436 $oldbiblio->{items_count} = $items_count;
1437 $oldbiblio->{available_items_loop} = \@available_items_loop;
1438 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1439 $oldbiblio->{other_items_loop} = \@other_items_loop;
1440 $oldbiblio->{availablecount} = $available_count;
1441 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1442 $oldbiblio->{onloancount} = $onloan_count;
1443 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1444 $oldbiblio->{othercount} = $other_count;
1445 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1446 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1447 $oldbiblio->{itemlostcount} = $itemlost_count;
1448 $oldbiblio->{damagedcount} = $itemdamaged_count;
1449 $oldbiblio->{orderedcount} = $ordered_count;
1450 $oldbiblio->{isbn} =~
1451 s/-//g; # deleting - in isbn to enable amazon content
1452 push( @newresults, $oldbiblio );
1457 #----------------------------------------------------------------------
1459 # Non-Zebra GetRecords#
1460 #----------------------------------------------------------------------
1464 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1470 $query, $simple_query, $sort_by_ref, $servers_ref,
1471 $results_per_page, $offset, $expanded_facet, $branches,
1474 warn "query =$query" if $DEBUG;
1475 my $result = NZanalyse($query);
1476 warn "results =$result" if $DEBUG;
1478 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1484 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1485 the list is built from an inverted index in the nozebra SQL table
1486 note that title is here only for convenience : the sorting will be very fast when requested on title
1487 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1492 my ( $string, $server ) = @_;
1493 # warn "---------" if $DEBUG;
1494 warn " NZanalyse" if $DEBUG;
1495 # warn "---------" if $DEBUG;
1497 # $server contains biblioserver or authorities, depending on what we search on.
1498 #warn "querying : $string on $server";
1499 $server = 'biblioserver' unless $server;
1501 # if we have a ", replace the content to discard temporarily any and/or/not inside
1503 if ( $string =~ /"/ ) {
1504 $string =~ s/"(.*?)"/__X__/;
1506 warn "commacontent : $commacontent" if $DEBUG;
1509 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1510 # then, call again NZanalyse with $left and $right
1511 # (recursive until we find a leaf (=> something without and/or/not)
1512 # delete repeated operator... Would then go in infinite loop
1513 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1516 #process parenthesis before.
1517 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1520 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1522 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1524 my $leftresult = NZanalyse( $left, $server );
1526 my $rightresult = NZanalyse( $right, $server );
1528 # OK, we have the results for right and left part of the query
1529 # depending of operand, intersect, union or exclude both lists
1530 # to get a result list
1531 if ( $operator eq ' and ' ) {
1532 return NZoperatorAND($leftresult,$rightresult);
1534 elsif ( $operator eq ' or ' ) {
1536 # just merge the 2 strings
1537 return $leftresult . $rightresult;
1539 elsif ( $operator eq ' not ' ) {
1540 return NZoperatorNOT($leftresult,$rightresult);
1544 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1548 warn "string :" . $string if $DEBUG;
1549 $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1552 my $operator = lc($2); # FIXME: and/or/not are operators, not operands
1553 warn "no parenthesis. left : $left operator: $operator right: $right"
1556 # it's not a leaf, we have a and/or/not
1559 # reintroduce comma content if needed
1560 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1561 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1562 warn "node : $left / $operator / $right\n" if $DEBUG;
1563 my $leftresult = NZanalyse( $left, $server );
1564 my $rightresult = NZanalyse( $right, $server );
1565 warn " leftresult : $leftresult" if $DEBUG;
1566 warn " rightresult : $rightresult" if $DEBUG;
1567 # OK, we have the results for right and left part of the query
1568 # depending of operand, intersect, union or exclude both lists
1569 # to get a result list
1570 if ( $operator eq ' and ' ) {
1572 return NZoperatorAND($leftresult,$rightresult);
1574 elsif ( $operator eq ' or ' ) {
1576 # just merge the 2 strings
1577 return $leftresult . $rightresult;
1579 elsif ( $operator eq ' not ' ) {
1580 return NZoperatorNOT($leftresult,$rightresult);
1584 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1585 die "error : operand unknown : $operator for $string";
1588 # it's a leaf, do the real SQL query and return the result
1591 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1592 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1593 warn "leaf:$string" if $DEBUG;
1595 # parse the string in in operator/operand/value again
1596 $string =~ /(.*)(>=|<=)(.*)/;
1600 # warn "handling leaf... left:$left operator:$operator right:$right"
1602 unless ($operator) {
1603 $string =~ /(.*)(>|<|=)(.*)/;
1608 # "handling unless (operator)... left:$left operator:$operator right:$right"
1613 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1614 $left =~ s/[, ].*$//;
1616 # automatic replace for short operators
1617 $left = 'title' if $left =~ '^ti$';
1618 $left = 'author' if $left =~ '^au$';
1619 $left = 'publisher' if $left =~ '^pb$';
1620 $left = 'subject' if $left =~ '^su$';
1621 $left = 'koha-Auth-Number' if $left =~ '^an$';
1622 $left = 'keyword' if $left =~ '^kw$';
1623 warn "handling leaf... left:$left operator:$operator right:$right";
1624 if ( $operator && $left ne 'keyword' ) {
1626 #do a specific search
1627 my $dbh = C4::Context->dbh;
1628 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1631 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1633 warn "$left / $operator / $right\n";
1635 # split each word, query the DB and build the biblionumbers result
1636 #sanitizing leftpart
1637 $left =~ s/^\s+|\s+$//;
1638 foreach ( split / /, $right ) {
1640 $_ =~ s/^\s+|\s+$//;
1642 warn "EXECUTE : $server, $left, $_";
1643 $sth->execute( $server, $left, $_ )
1644 or warn "execute failed: $!";
1645 while ( my ( $line, $value ) = $sth->fetchrow ) {
1647 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1648 # otherwise, fill the result
1649 $biblionumbers .= $line
1650 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1651 warn "result : $value "
1652 . ( $right =~ /\d/ ) . "=="
1653 . ( $value =~ /\D/?$line:"" ); #= $line";
1656 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1659 $results = NZoperatorAND($biblionumbers,$results);
1662 $results = $biblionumbers;
1668 #do a complete search (all indexes), if index='kw' do complete search too.
1669 my $dbh = C4::Context->dbh;
1672 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1675 # split each word, query the DB and build the biblionumbers result
1676 foreach ( split / /, $string ) {
1677 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1678 warn "search on all indexes on $_" if $DEBUG;
1681 $sth->execute( $server, $_ );
1682 while ( my $line = $sth->fetchrow ) {
1683 $biblionumbers .= $line;
1686 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1688 $results = NZoperatorAND($biblionumbers,$results);
1691 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1692 $results = $biblionumbers;
1696 warn "return : $results for LEAF : $string" if $DEBUG;
1699 warn "---------" if $DEBUG;
1700 warn "Leave NZanalyse" if $DEBUG;
1701 warn "---------" if $DEBUG;
1705 my ($rightresult, $leftresult)=@_;
1707 my @leftresult = split /;/, $leftresult;
1708 warn " @leftresult / $rightresult \n" if $DEBUG;
1710 # my @rightresult = split /;/,$leftresult;
1713 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1714 # the result is stored twice, to have the same weight for AND than OR.
1715 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1716 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1717 foreach (@leftresult) {
1720 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1721 if ( $rightresult =~ /$value-(\d+);/ ) {
1722 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1724 "$value-$countvalue;$value-$countvalue;";
1727 warn " $finalresult \n" if $DEBUG;
1728 return $finalresult;
1732 my ($rightresult, $leftresult)=@_;
1733 return $rightresult.$leftresult;
1737 my ($rightresult, $leftresult)=@_;
1739 my @leftresult = split /;/, $leftresult;
1741 # my @rightresult = split /;/,$leftresult;
1743 foreach (@leftresult) {
1745 $value=$1 if $value=~m/(.*)-\d+$/;
1746 unless ($rightresult =~ "$value-") {
1747 $finalresult .= "$_;";
1750 return $finalresult;
1755 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1762 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1763 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1765 # order title asc by default
1766 # $ordering = '1=36 <i' unless $ordering;
1767 $results_per_page = 20 unless $results_per_page;
1768 $offset = 0 unless $offset;
1769 my $dbh = C4::Context->dbh;
1772 # order by POPULARITY
1774 if ( $ordering =~ /popularity/ ) {
1778 # popularity is not in MARC record, it's builded from a specific query
1780 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1781 foreach ( split /;/, $biblionumbers ) {
1782 my ( $biblionumber, $title ) = split /,/, $_;
1783 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1784 $sth->execute($biblionumber);
1785 my $popularity = $sth->fetchrow || 0;
1787 # hint : the key is popularity.title because we can have
1788 # many results with the same popularity. In this cas, sub-ordering is done by title
1789 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1790 # (un-frequent, I agree, but we won't forget anything that way ;-)
1791 $popularity{ sprintf( "%10d", $popularity ) . $title
1792 . $biblionumber } = $biblionumber;
1795 # sort the hash and return the same structure as GetRecords (Zebra querying)
1798 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1799 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1800 $result_hash->{'RECORDS'}[ $numbers++ ] =
1801 $result{ $popularity{$key} }->as_usmarc();
1804 else { # sort popularity ASC
1805 foreach my $key ( sort ( keys %popularity ) ) {
1806 $result_hash->{'RECORDS'}[ $numbers++ ] =
1807 $result{ $popularity{$key} }->as_usmarc();
1810 my $finalresult = ();
1811 $result_hash->{'hits'} = $numbers;
1812 $finalresult->{'biblioserver'} = $result_hash;
1813 return $finalresult;
1819 elsif ( $ordering =~ /author/ ) {
1821 foreach ( split /;/, $biblionumbers ) {
1822 my ( $biblionumber, $title ) = split /,/, $_;
1823 my $record = GetMarcBiblio($biblionumber);
1825 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1826 $author = $record->subfield( '200', 'f' );
1827 $author = $record->subfield( '700', 'a' ) unless $author;
1830 $author = $record->subfield( '100', 'a' );
1833 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1834 # and we don't want to get only 1 result for each of them !!!
1835 $result{ $author . $biblionumber } = $record;
1838 # sort the hash and return the same structure as GetRecords (Zebra querying)
1841 if ( $ordering eq 'author_za' ) { # sort by author desc
1842 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1843 $result_hash->{'RECORDS'}[ $numbers++ ] =
1844 $result{$key}->as_usmarc();
1847 else { # sort by author ASC
1848 foreach my $key ( sort ( keys %result ) ) {
1849 $result_hash->{'RECORDS'}[ $numbers++ ] =
1850 $result{$key}->as_usmarc();
1853 my $finalresult = ();
1854 $result_hash->{'hits'} = $numbers;
1855 $finalresult->{'biblioserver'} = $result_hash;
1856 return $finalresult;
1859 # ORDER BY callnumber
1862 elsif ( $ordering =~ /callnumber/ ) {
1864 foreach ( split /;/, $biblionumbers ) {
1865 my ( $biblionumber, $title ) = split /,/, $_;
1866 my $record = GetMarcBiblio($biblionumber);
1868 my ( $callnumber_tag, $callnumber_subfield ) =
1869 GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1870 ( $callnumber_tag, $callnumber_subfield ) =
1871 GetMarcFromKohaField('biblioitems.callnumber')
1872 unless $callnumber_tag;
1873 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1874 $callnumber = $record->subfield( '200', 'f' );
1877 $callnumber = $record->subfield( '100', 'a' );
1880 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1881 # and we don't want to get only 1 result for each of them !!!
1882 $result{ $callnumber . $biblionumber } = $record;
1885 # sort the hash and return the same structure as GetRecords (Zebra querying)
1888 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1889 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1890 $result_hash->{'RECORDS'}[ $numbers++ ] =
1891 $result{$key}->as_usmarc();
1894 else { # sort by title ASC
1895 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1896 $result_hash->{'RECORDS'}[ $numbers++ ] =
1897 $result{$key}->as_usmarc();
1900 my $finalresult = ();
1901 $result_hash->{'hits'} = $numbers;
1902 $finalresult->{'biblioserver'} = $result_hash;
1903 return $finalresult;
1905 elsif ( $ordering =~ /pubdate/ ) { #pub year
1907 foreach ( split /;/, $biblionumbers ) {
1908 my ( $biblionumber, $title ) = split /,/, $_;
1909 my $record = GetMarcBiblio($biblionumber);
1910 my ( $publicationyear_tag, $publicationyear_subfield ) =
1911 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1912 my $publicationyear =
1913 $record->subfield( $publicationyear_tag,
1914 $publicationyear_subfield );
1916 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1917 # and we don't want to get only 1 result for each of them !!!
1918 $result{ $publicationyear . $biblionumber } = $record;
1921 # sort the hash and return the same structure as GetRecords (Zebra querying)
1924 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1925 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1926 $result_hash->{'RECORDS'}[ $numbers++ ] =
1927 $result{$key}->as_usmarc();
1930 else { # sort by pub year ASC
1931 foreach my $key ( sort ( keys %result ) ) {
1932 $result_hash->{'RECORDS'}[ $numbers++ ] =
1933 $result{$key}->as_usmarc();
1936 my $finalresult = ();
1937 $result_hash->{'hits'} = $numbers;
1938 $finalresult->{'biblioserver'} = $result_hash;
1939 return $finalresult;
1945 elsif ( $ordering =~ /title/ ) {
1947 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1949 foreach ( split /;/, $biblionumbers ) {
1950 my ( $biblionumber, $title ) = split /,/, $_;
1952 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1953 # and we don't want to get only 1 result for each of them !!!
1954 # hint & speed improvement : we can order without reading the record
1955 # so order, and read records only for the requested page !
1956 $result{ $title . $biblionumber } = $biblionumber;
1959 # sort the hash and return the same structure as GetRecords (Zebra querying)
1962 if ( $ordering eq 'title_az' ) { # sort by title desc
1963 foreach my $key ( sort ( keys %result ) ) {
1964 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1967 else { # sort by title ASC
1968 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1969 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1973 # limit the $results_per_page to result size if it's more
1974 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
1976 # for the requested page, replace biblionumber by the complete record
1977 # speed improvement : avoid reading too much things
1979 my $counter = $offset ;
1980 $counter <= $offset + $results_per_page ;
1984 $result_hash->{'RECORDS'}[$counter] =
1985 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
1987 my $finalresult = ();
1988 $result_hash->{'hits'} = $numbers;
1989 $finalresult->{'biblioserver'} = $result_hash;
1990 return $finalresult;
1997 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2000 foreach ( split /;/, $biblionumbers ) {
2001 my ( $biblionumber, $title ) = split /,/, $_;
2002 $title =~ /(.*)-(\d)/;
2007 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2008 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2009 # biblio N has ranking = 6
2010 $count_ranking{$biblionumber} += $ranking;
2013 # build the result by "inverting" the count_ranking hash
2014 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2016 foreach ( keys %count_ranking ) {
2017 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2020 # sort the hash and return the same structure as GetRecords (Zebra querying)
2023 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2024 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2027 # limit the $results_per_page to result size if it's more
2028 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2030 # for the requested page, replace biblionumber by the complete record
2031 # speed improvement : avoid reading too much things
2033 my $counter = $offset ;
2034 $counter <= $offset + $results_per_page ;
2038 $result_hash->{'RECORDS'}[$counter] =
2039 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2040 if $result_hash->{'RECORDS'}[$counter];
2042 my $finalresult = ();
2043 $result_hash->{'hits'} = $numbers;
2044 $finalresult->{'biblioserver'} = $result_hash;
2045 return $finalresult;
2051 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2053 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2054 test parameter if set donot perform change to records in database.
2060 * $listbiblios is an array ref to marcrecords to be changed
2061 * $tagsubfield is the reference of the subfield to change.
2062 * $initvalue is the value to search the record for
2063 * $targetvalue is the value to set the subfield to
2064 * $test is to be set only not to perform changes in database.
2066 =item C<Output arg:>
2067 * $countchanged counts all the changes performed.
2068 * $listunchanged contains the list of all the biblionumbers of records unchanged.
2070 =item C<usage in the script:>
2074 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2075 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2076 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2081 my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2084 my ( $tag, $subfield ) = ( $1, $2 )
2085 if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2086 if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2087 $tag = $tag . $subfield;
2090 my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber');
2091 my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2092 if ($tag eq $itemtag) {
2093 # do not allow the embedded item tag to be
2095 warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2098 foreach my $usmarc (@$listbiblios) {
2100 $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2104 # usmarc is not a valid usmarc May be a biblionumber
2105 # FIXME - sorry, please let's figure out whether
2106 # this function is to be passed a list of
2107 # record numbers or a list of MARC::Record
2108 # objects. The former is probably better
2109 # because the MARC records supplied by Zebra
2110 # may be not current.
2111 $record = GetMarcBiblio($usmarc);
2112 $biblionumber = $usmarc;
2115 if ( $bntag >= 010 ) {
2116 $biblionumber = $record->subfield( $bntag, $bnsubf );
2119 $biblionumber = $record->field($bntag)->data;
2123 #GetBiblionumber is to be written.
2124 #Could be replaced by TransformMarcToKoha (But Would be longer)
2125 if ( $record->field($tag) ) {
2127 foreach my $field ( $record->field($tag) ) {
2130 $field->delete_subfield(
2131 'code' => $subfield,
2132 'match' => qr($initvalue)
2138 $field->update( $subfield, $targetvalue )
2143 if ( $tag >= 010 ) {
2144 if ( $field->delete_field($field) ) {
2150 $field->data = $targetvalue
2151 if ( $field->data =~ qr($initvalue) );
2156 # warn $record->as_formatted;
2158 ModBiblio( $record, $biblionumber,
2159 GetFrameworkCode($biblionumber) )
2163 push @unmatched, $biblionumber;
2167 push @unmatched, $biblionumber;
2170 return ( $countmatched, \@unmatched );
2173 END { } # module clean-up code here (global destructor)
2180 Koha Developement team <info@koha.org>