3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Dates qw(format_date);
26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
28 # set the version for version checking
31 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
36 C4::Search - Functions for searching the Koha catalog.
40 See opac/opac-search.pl or catalogue/search.pl for example of usage
44 This module provides searching functions for Koha's bibliographic databases
62 # make all your functions, whether exported or not;
64 =head2 findseealso($dbh,$fields);
66 C<$dbh> is a link to the DB handler.
69 my $dbh =C4::Context->dbh;
71 C<$fields> is a reference to the fields array
73 This function modifies the @$fields array and adds related fields to search on.
75 FIXME: this function is probably deprecated in Koha 3
80 my ( $dbh, $fields ) = @_;
81 my $tagslib = GetMarcStructure(1);
82 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
83 my ($tag) = substr( @$fields[$i], 1, 3 );
84 my ($subfield) = substr( @$fields[$i], 4, 1 );
85 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
86 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
92 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
94 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
100 my $dbh = C4::Context->dbh;
101 my $result = TransformMarcToKoha( $dbh, $record, '' );
106 my ( $biblionumber, $title );
108 # search duplicate on ISBN, easy and fast..
109 # ... normalize first
110 if ( $result->{isbn} ) {
111 $result->{isbn} =~ s/\(.*$//;
112 $result->{isbn} =~ s/\s+$//;
113 $query = "isbn=$result->{isbn}";
116 $result->{title} =~ s /\\//g;
117 $result->{title} =~ s /\"//g;
118 $result->{title} =~ s /\(//g;
119 $result->{title} =~ s /\)//g;
121 # FIXME: instead of removing operators, could just do
122 # quotes around the value
123 $result->{title} =~ s/(and|or|not)//g;
124 $query = "ti,ext=$result->{title}";
125 $query .= " and itemtype=$result->{itemtype}"
126 if ( $result->{itemtype} );
127 if ( $result->{author} ) {
128 $result->{author} =~ s /\\//g;
129 $result->{author} =~ s /\"//g;
130 $result->{author} =~ s /\(//g;
131 $result->{author} =~ s /\)//g;
133 # remove valid operators
134 $result->{author} =~ s/(and|or|not)//g;
135 $query .= " and au,ext=$result->{author}";
139 # FIXME: add error handling
140 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
142 foreach my $possible_duplicate_record (@$searchresults) {
144 MARC::Record->new_from_usmarc($possible_duplicate_record);
145 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
147 # FIXME :: why 2 $biblionumber ?
149 push @results, $result->{'biblionumber'};
150 push @results, $result->{'title'};
158 ($error,$results) = SimpleSearch($query,@servers);
160 This function provides a simple search API on the bibliographic catalog
166 * $query can be a simple keyword or a complete CCL query
167 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
170 * $error is a empty unless an error is detected
171 * \@results is an array of records.
173 =item C<usage in the script:>
177 my ($error, $marcresults) = SimpleSearch($query);
179 if (defined $error) {
180 $template->param(query_error => $error);
181 warn "error: ".$error;
182 output_html_with_http_headers $input, $cookie, $template->output;
186 my $hits = scalar @$marcresults;
189 for(my $i=0;$i<$hits;$i++) {
191 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
192 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
194 #build the hash for the template.
195 $resultsloop{highlight} = ($i % 2)?(1):(0);
196 $resultsloop{title} = $biblio->{'title'};
197 $resultsloop{subtitle} = $biblio->{'subtitle'};
198 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
199 $resultsloop{author} = $biblio->{'author'};
200 $resultsloop{publishercode} = $biblio->{'publishercode'};
201 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
203 push @results, \%resultsloop;
206 $template->param(result=>\@results);
212 if ( C4::Context->preference('NoZebra') ) {
213 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
216 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
217 return ( undef, $search_result );
224 return ( "No query entered", undef ) unless $query;
226 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
227 @servers = ("biblioserver") unless @servers;
229 # Initialize & Search Zebra
230 for ( my $i = 0 ; $i < @servers ; $i++ ) {
232 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
235 ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
239 $zconns[$i]->errmsg() . " ("
240 . $zconns[$i]->errcode() . ") "
241 . $zconns[$i]->addinfo() . " "
242 . $zconns[$i]->diagset();
244 return ( $error, undef ) if $zconns[$i]->errcode();
248 # caught a ZOOM::Exception
252 . $@->addinfo() . " "
255 return ( $error, undef );
260 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
261 $ev = $zconns[ $i - 1 ]->last_event();
262 if ( $ev == ZOOM::Event::ZEND ) {
263 $hits = $tmpresults[ $i - 1 ]->size();
266 for ( my $j = 0 ; $j < $hits ; $j++ ) {
267 my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
268 push @results, $record;
273 return ( undef, \@results );
279 ( undef, $results_hashref, \@facets_loop ) = getRecords (
281 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
282 $results_per_page, $offset, $expanded_facet, $branches,
286 The all singing, all dancing, multi-server, asynchronous, scanning,
287 searching, record nabbing, facet-building
289 See verbse embedded documentation.
295 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
296 $results_per_page, $offset, $expanded_facet, $branches,
300 my @servers = @$servers_ref;
301 my @sort_by = @$sort_by_ref;
303 # Initialize variables for the ZOOM connection and results object
307 my $results_hashref = ();
309 # Initialize variables for the faceted results objects
310 my $facets_counter = ();
311 my $facets_info = ();
312 my $facets = getFacets();
315 ; # stores the ref to array of hashes for template facets loop
317 ### LOOP THROUGH THE SERVERS
318 for ( my $i = 0 ; $i < @servers ; $i++ ) {
319 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
321 # perform the search, create the results objects
322 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
324 if ( $servers[$i] =~ /biblioserver/ ) {
325 $query_to_use = $koha_query;
328 $query_to_use = $simple_query;
331 #$query_to_use = $simple_query if $scan;
332 warn $simple_query if ( $scan and $DEBUG );
334 # Check if we've got a query_type defined, if so, use it
338 if ( $query_type =~ /^ccl/ ) {
340 s/\:/\=/g; # change : to = last minute (FIXME)
343 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
346 elsif ( $query_type =~ /^cql/ ) {
349 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
351 elsif ( $query_type =~ /^pqf/ ) {
354 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
361 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
367 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
373 warn "WARNING: query problem with $query_to_use " . $@;
376 # Concatenate the sort_by limits and pass them to the results object
377 # Note: sort will override rank
379 foreach my $sort (@sort_by) {
380 if ( $sort eq "author_az" ) {
381 $sort_by .= "1=1003 <i ";
383 elsif ( $sort eq "author_za" ) {
384 $sort_by .= "1=1003 >i ";
386 elsif ( $sort eq "popularity_asc" ) {
387 $sort_by .= "1=9003 <i ";
389 elsif ( $sort eq "popularity_dsc" ) {
390 $sort_by .= "1=9003 >i ";
392 elsif ( $sort eq "call_number_asc" ) {
393 $sort_by .= "1=20 <i ";
395 elsif ( $sort eq "call_number_dsc" ) {
396 $sort_by .= "1=20 >i ";
398 elsif ( $sort eq "pubdate_asc" ) {
399 $sort_by .= "1=31 <i ";
401 elsif ( $sort eq "pubdate_dsc" ) {
402 $sort_by .= "1=31 >i ";
404 elsif ( $sort eq "acqdate_asc" ) {
405 $sort_by .= "1=32 <i ";
407 elsif ( $sort eq "acqdate_dsc" ) {
408 $sort_by .= "1=32 >i ";
410 elsif ( $sort eq "title_az" ) {
411 $sort_by .= "1=4 <i ";
413 elsif ( $sort eq "title_za" ) {
414 $sort_by .= "1=4 >i ";
418 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
419 warn "WARNING sort $sort_by failed";
422 } # finished looping through servers
424 # The big moment: asynchronously retrieve results from all servers
425 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
426 my $ev = $zconns[ $i - 1 ]->last_event();
427 if ( $ev == ZOOM::Event::ZEND ) {
428 next unless $results[ $i - 1 ];
429 my $size = $results[ $i - 1 ]->size();
433 # loop through the results
434 $results_hash->{'hits'} = $size;
436 if ( $offset + $results_per_page <= $size ) {
437 $times = $offset + $results_per_page;
442 for ( my $j = $offset ; $j < $times ; $j++ ) {
447 ## Check if it's an index scan
449 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
451 # here we create a minimal MARC record and hand it off to the
452 # template just like a normal result ... perhaps not ideal, but
454 my $tmprecord = MARC::Record->new();
455 $tmprecord->encoding('UTF-8');
459 # the minimal record in author/title (depending on MARC flavour)
460 if ( C4::Context->preference("marcflavour") eq
463 $tmptitle = MARC::Field->new(
471 MARC::Field->new( '245', ' ', ' ', a => $term, );
473 MARC::Field->new( '100', ' ', ' ', a => $occ, );
475 $tmprecord->append_fields($tmptitle);
476 $tmprecord->append_fields($tmpauthor);
477 $results_hash->{'RECORDS'}[$j] =
478 $tmprecord->as_usmarc();
483 $record = $results[ $i - 1 ]->record($j)->raw();
485 # warn "RECORD $j:".$record;
486 $results_hash->{'RECORDS'}[$j] = $record;
488 # Fill the facets while we're looping, but only for the biblioserver
489 $facet_record = MARC::Record->new_from_usmarc($record)
490 if $servers[ $i - 1 ] =~ /biblioserver/;
492 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
494 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
496 if ( $facets->[$k] ) {
498 for my $tag ( @{ $facets->[$k]->{'tags'} } )
501 $facet_record->field($tag);
503 for my $field (@fields) {
504 my @subfields = $field->subfields();
505 for my $subfield (@subfields) {
506 my ( $code, $data ) = @$subfield;
508 $facets->[$k]->{'subfield'} )
510 $facets_counter->{ $facets->[$k]
516 $facets_info->{ $facets->[$k]
517 ->{'link_value'} }->{'label_value'} =
518 $facets->[$k]->{'label_value'};
519 $facets_info->{ $facets->[$k]
520 ->{'link_value'} }->{'expanded'} =
521 $facets->[$k]->{'expanded'};
527 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
530 # warn "connection ", $i-1, ": $size hits";
531 # warn $results[$i-1]->record(0)->render() if $size > 0;
534 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
536 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
537 keys %$facets_counter )
540 my $number_of_facets;
541 my @this_facets_array;
544 $facets_counter->{$link_value}
545 ->{$b} <=> $facets_counter->{$link_value}->{$a}
546 } keys %{ $facets_counter->{$link_value} }
550 if ( ( $number_of_facets < 6 )
551 || ( $expanded_facet eq $link_value )
552 || ( $facets_info->{$link_value}->{'expanded'} ) )
555 # Sanitize the link value ), ( will cause errors with CCL,
556 my $facet_link_value = $one_facet;
557 $facet_link_value =~ s/(\(|\))/ /g;
559 # fix the length that will display in the label,
560 my $facet_label_value = $one_facet;
562 substr( $one_facet, 0, 20 ) . "..."
563 unless length($facet_label_value) <= 20;
565 # if it's a branch, label by the name, not the code,
566 if ( $link_value =~ /branch/ ) {
568 $branches->{$one_facet}->{'branchname'};
571 # but we're down with the whole label being in the link's title.
572 my $facet_title_value = $one_facet;
574 push @this_facets_array,
578 $facets_counter->{$link_value}
580 facet_label_value => $facet_label_value,
581 facet_title_value => $facet_title_value,
582 facet_link_value => $facet_link_value,
583 type_link_value => $link_value,
589 # handle expanded option
590 unless ( $facets_info->{$link_value}->{'expanded'} ) {
592 if ( ( $number_of_facets > 6 )
593 && ( $expanded_facet ne $link_value ) );
598 type_link_value => $link_value,
599 type_id => $link_value . "_id",
601 $facets_info->{$link_value}->{'label_value'},
602 facets => \@this_facets_array,
603 expandable => $expandable,
604 expand => $link_value,
611 return ( undef, $results_hashref, \@facets_loop );
615 sub _remove_stopwords {
616 my ( $operand, $index ) = @_;
617 my @stopwords_removed;
619 # phrase and exact-qualified indexes shouldn't have stopwords removed
620 if ( $index !~ m/phr|ext/ ) {
622 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
623 # we use IsAlpha unicode definition, to deal correctly with diacritics.
624 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
625 # is a stopword, we'd get "çon" and wouldn't find anything...
626 foreach ( keys %{ C4::Context->stopwords } ) {
627 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
629 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
631 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
632 $operand =~ s/^$_\P{IsAlpha}/ /gi;
633 $operand =~ s/\P{IsAlpha}$_$/ /gi;
634 push @stopwords_removed, $_;
638 return ( $operand, \@stopwords_removed );
642 sub _detect_truncation {
643 my ( $operand, $index ) = @_;
644 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
647 my @wordlist = split( /\s/, $operand );
648 foreach my $word (@wordlist) {
649 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
650 push @rightlefttruncated, $word;
652 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
653 push @lefttruncated, $word;
655 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
656 push @righttruncated, $word;
658 elsif ( index( $word, "*" ) < 0 ) {
659 push @nontruncated, $word;
662 push @regexpr, $word;
666 \@nontruncated, \@righttruncated, \@lefttruncated,
667 \@rightlefttruncated, \@regexpr
672 sub _build_stemmed_operand {
676 # FIXME: the locale should be set based on the user's language and/or search choice
677 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
679 # FIXME: these should be stored in the db so the librarian can modify the behavior
680 $stemmer->add_exceptions(
687 my @words = split( / /, $operand );
688 my $stems = $stemmer->stem(@words);
689 for my $stem (@$stems) {
690 $stemmed_operand .= "$stem";
691 $stemmed_operand .= "?"
692 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
693 $stemmed_operand .= " ";
695 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
696 return $stemmed_operand;
700 sub _build_weighted_query {
702 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
703 # pretty well but could work much better if we had a smarter query parser
704 my ( $operand, $stemmed_operand, $index ) = @_;
705 my $stemming = C4::Context->preference("QueryStemming") || 0;
706 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
707 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
709 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
711 # Keyword, or, no index specified
712 if ( ( $index eq 'kw' ) || ( !$index ) ) {
714 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
715 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
716 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
717 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
718 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
719 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
720 if $fuzzy_enabled; # add fuzzy, word list
721 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
722 if ( $stemming and $stemmed_operand )
723 ; # add stemming, right truncation
724 $weighted_query .= " or wrdl,r9=\"$operand\"";
726 # embedded sorting: 0 a-z; 1 z-a
727 # $weighted_query .= ") or (sort1,aut=1";
730 # Barcode searches should skip this process
731 elsif ( $index eq 'bc' ) {
732 $weighted_query .= "bc=\"$operand\"";
735 # Authority-number searches should skip this process
736 elsif ( $index eq 'an' ) {
737 $weighted_query .= "an=\"$operand\"";
740 # If the index already has more than one qualifier, wrap the operand
741 # in quotes and pass it back (assumption is that the user knows what they
742 # are doing and won't appreciate us mucking up their query
743 elsif ( $index =~ ',' ) {
744 $weighted_query .= " $index=\"$operand\"";
747 #TODO: build better cases based on specific search indexes
749 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
750 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
751 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
753 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
756 $weighted_query .= "))"; # close rank specification
757 return $weighted_query;
763 $simple_query, $query_cgi,
765 $limit_cgi, $limit_desc,
766 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
768 Build queries and limits in CCL, CGI, Human,
769 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
771 See verbose embedded documentation.
777 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
779 warn "---------" if $DEBUG;
780 warn "Enter buildQuery" if $DEBUG;
781 warn "---------" if $DEBUG;
784 my @operators = @$operators if $operators;
785 my @indexes = @$indexes if $indexes;
786 my @operands = @$operands if $operands;
787 my @limits = @$limits if $limits;
788 my @sort_by = @$sort_by if $sort_by;
790 my $stemming = C4::Context->preference("QueryStemming") || 0;
791 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
792 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
793 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
794 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
796 # no stemming/weight/fuzzy in NoZebra
797 if ( C4::Context->preference("NoZebra") ) {
803 my $query = $operands[0];
804 my $simple_query = $operands[0];
806 # initialize the variables we're passing back
815 my $stopwords_removed; # flag to determine if stopwords have been removed
817 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
819 if ( $query =~ /^ccl=/ ) {
820 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
822 if ( $query =~ /^cql=/ ) {
823 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
825 if ( $query =~ /^pqf=/ ) {
826 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
829 # pass nested queries directly
830 # FIXME: need better handling of some of these variables in this case
831 if ( $query =~ /(\(|\))/ ) {
833 undef, $query, $simple_query, $query_cgi,
834 $query, $limit, $limit_cgi, $limit_desc,
835 $stopwords_removed, 'ccl'
839 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
840 # query operands and indexes and add stemming, truncation, field weighting, etc.
841 # Once we do so, we'll end up with a value in $query, just like if we had an
842 # incoming $query from the user
845 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
847 ; # a flag used to keep track if there was a previous query
848 # if there was, we can apply the current operator
850 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
852 # COMBINE OPERANDS, INDEXES AND OPERATORS
853 if ( $operands[$i] ) {
855 # A flag to determine whether or not to add the index to the query
858 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
859 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
862 $remove_stopwords = 0;
864 my $operand = $operands[$i];
865 my $index = $indexes[$i];
867 # Add index-specific attributes
868 # Date of Publication
869 if ( $index eq 'yr' ) {
870 $index .= ",st-numeric";
873 $stemming, $auto_truncation,
874 $weight_fields, $fuzzy_enabled,
876 ) = ( 0, 0, 0, 0, 0 );
879 # Date of Acquisition
880 elsif ( $index eq 'acqdate' ) {
881 $index .= ",st-date-normalized";
884 $stemming, $auto_truncation,
885 $weight_fields, $fuzzy_enabled,
887 ) = ( 0, 0, 0, 0, 0 );
890 # Set default structure attribute (word list)
892 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
893 $struct_attr = ",wrdl";
896 # Some helpful index variants
897 my $index_plus = $index . $struct_attr . ":" if $index;
898 my $index_plus_comma = $index . $struct_attr . "," if $index;
901 if ($remove_stopwords) {
902 ( $operand, $stopwords_removed ) =
903 _remove_stopwords( $operand, $index );
904 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
905 warn "REMOVED STOPWORDS: @$stopwords_removed"
906 if ( $stopwords_removed && $DEBUG );
910 my ( $nontruncated, $righttruncated, $lefttruncated,
911 $rightlefttruncated, $regexpr );
912 my $truncated_operand;
914 $nontruncated, $righttruncated, $lefttruncated,
915 $rightlefttruncated, $regexpr
916 ) = _detect_truncation( $operand, $index );
918 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
923 scalar(@$righttruncated) + scalar(@$lefttruncated) +
924 scalar(@$rightlefttruncated) > 0 )
927 # Don't field weight or add the index to the query, we do it here
929 undef $weight_fields;
930 my $previous_truncation_operand;
931 if ( scalar(@$nontruncated) > 0 ) {
932 $truncated_operand .= "$index_plus @$nontruncated ";
933 $previous_truncation_operand = 1;
935 if ( scalar(@$righttruncated) > 0 ) {
936 $truncated_operand .= "and "
937 if $previous_truncation_operand;
938 $truncated_operand .=
939 "$index_plus_comma" . "rtrn:@$righttruncated ";
940 $previous_truncation_operand = 1;
942 if ( scalar(@$lefttruncated) > 0 ) {
943 $truncated_operand .= "and "
944 if $previous_truncation_operand;
945 $truncated_operand .=
946 "$index_plus_comma" . "ltrn:@$lefttruncated ";
947 $previous_truncation_operand = 1;
949 if ( scalar(@$rightlefttruncated) > 0 ) {
950 $truncated_operand .= "and "
951 if $previous_truncation_operand;
952 $truncated_operand .=
953 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
954 $previous_truncation_operand = 1;
957 $operand = $truncated_operand if $truncated_operand;
958 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
962 $stemmed_operand = _build_stemmed_operand($operand)
964 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
966 # Handle Field Weighting
967 my $weighted_operand;
969 _build_weighted_query( $operand, $stemmed_operand, $index )
971 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
972 $operand = $weighted_operand if $weight_fields;
973 $indexes_set = 1 if $weight_fields;
975 # If there's a previous operand, we need to add an operator
976 if ($previous_operand) {
978 # User-specified operator
979 if ( $operators[ $i - 1 ] ) {
980 $query .= " $operators[$i-1] ";
981 $query .= " $index_plus " unless $indexes_set;
982 $query .= " $operand";
983 $query_cgi .= "&op=$operators[$i-1]";
984 $query_cgi .= "&idx=$index" if $index;
985 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
987 " $operators[$i-1] $index_plus $operands[$i]";
990 # Default operator is and
993 $query .= "$index_plus " unless $indexes_set;
994 $query .= "$operand";
995 $query_cgi .= "&op=and&idx=$index" if $index;
996 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
997 $query_desc .= " and $index_plus $operands[$i]";
1001 # There isn't a pervious operand, don't need an operator
1004 # Field-weighted queries already have indexes set
1005 $query .= " $index_plus " unless $indexes_set;
1007 $query_desc .= " $index_plus $operands[$i]";
1008 $query_cgi .= "&idx=$index" if $index;
1009 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1010 $previous_operand = 1;
1015 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1018 my $group_OR_limits;
1019 my $availability_limit;
1020 foreach my $this_limit (@limits) {
1021 if ( $this_limit =~ /available/ ) {
1023 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1025 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1026 $availability_limit .=
1027 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1028 $limit_cgi .= "&limit=available";
1032 # group_OR_limits, prefixed by mc-
1033 # OR every member of the group
1034 elsif ( $this_limit =~ /mc/ ) {
1035 $group_OR_limits .= " or " if $group_OR_limits;
1036 $limit_desc .= " or " if $group_OR_limits;
1037 $group_OR_limits .= "$this_limit";
1038 $limit_cgi .= "&limit=$this_limit";
1039 $limit_desc .= " $this_limit";
1042 # Regular old limits
1044 $limit .= " and " if $limit || $query;
1045 $limit .= "$this_limit";
1046 $limit_cgi .= "&limit=$this_limit";
1047 $limit_desc .= " $this_limit";
1050 if ($group_OR_limits) {
1051 $limit .= " and " if ( $query || $limit );
1052 $limit .= "($group_OR_limits)";
1054 if ($availability_limit) {
1055 $limit .= " and " if ( $query || $limit );
1056 $limit .= "($availability_limit)";
1059 # Normalize the query and limit strings
1062 for ( $query, $query_desc, $limit, $limit_desc ) {
1063 $_ =~ s/ / /g; # remove extra spaces
1064 $_ =~ s/^ //g; # remove any beginning spaces
1065 $_ =~ s/ $//g; # remove any ending spaces
1066 $_ =~ s/==/=/g; # remove double == from query
1068 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1070 for ($query_cgi,$simple_query) {
1073 # append the limit to the query
1074 $query .= " " . $limit;
1078 warn "QUERY:" . $query;
1079 warn "QUERY CGI:" . $query_cgi;
1080 warn "QUERY DESC:" . $query_desc;
1081 warn "LIMIT:" . $limit;
1082 warn "LIMIT CGI:" . $limit_cgi;
1083 warn "LIMIT DESC:" . $limit_desc;
1085 warn "Leave buildQuery";
1089 undef, $query, $simple_query, $query_cgi,
1090 $query_desc, $limit, $limit_cgi, $limit_desc,
1091 $stopwords_removed, $query_type
1095 =head2 searchResults
1097 Format results in a form suitable for passing to the template
1101 # IMO this subroutine is pretty messy still -- it's responsible for
1102 # building the HTML output for the template
1104 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1105 my $dbh = C4::Context->dbh;
1110 # add search-term highlighting via <span>s on the search terms
1111 my $span_terms_hashref;
1112 for my $span_term ( split( / /, $searchdesc ) ) {
1113 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1114 $span_terms_hashref->{$span_term}++;
1117 #Build branchnames hash
1119 #get branch information.....
1122 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1123 ; # FIXME : use C4::Koha::GetBranches
1125 while ( my $bdata = $bsth->fetchrow_hashref ) {
1126 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1131 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1134 while ( my $ldata = $lsch->fetchrow_hashref ) {
1135 $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1138 #Build itemtype hash
1139 #find itemtype & itemtype image
1143 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1146 while ( my $bdata = $bsth->fetchrow_hashref ) {
1147 $itemtypes{ $bdata->{'itemtype'} }->{description} =
1148 $bdata->{'description'};
1149 $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1150 $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'};
1151 $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1152 $bdata->{'notforloan'};
1155 #search item field code
1158 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1161 my ($itemtag) = $sth->fetchrow;
1163 # get notforloan authorised value list
1166 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1169 my ($notforloan_authorised_value) = $sth->fetchrow;
1171 ## find column names of items related to MARC
1172 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1174 my %subfieldstosearch;
1175 while ( ( my $column ) = $sth2->fetchrow ) {
1176 my ( $tagfield, $tagsubfield ) =
1177 &GetMarcFromKohaField( "items." . $column, "" );
1178 $subfieldstosearch{$column} = $tagsubfield;
1181 # handle which records to actually retrieve
1183 if ( $hits && $offset + $results_per_page <= $hits ) {
1184 $times = $offset + $results_per_page;
1190 # loop through all of the records we've retrieved
1191 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1193 $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1194 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1195 $oldbiblio->{result_number} = $i + 1;
1197 # add imageurl to itemtype if there is one
1198 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1199 $oldbiblio->{imageurl} =
1200 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1201 $oldbiblio->{description} =
1202 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1205 $oldbiblio->{imageurl} =
1206 getitemtypeimagesrc() . "/"
1207 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1208 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1209 $oldbiblio->{description} =
1210 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1213 # Build summary if there is one (the summary is defined in the itemtypes table)
1214 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1215 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1216 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1217 my @fields = $marcrecord->fields();
1218 foreach my $field (@fields) {
1219 my $tag = $field->tag();
1220 my $tagvalue = $field->as_string();
1222 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1223 unless ( $tag < 10 ) {
1224 my @subf = $field->subfields;
1225 for my $i ( 0 .. $#subf ) {
1226 my $subfieldcode = $subf[$i][0];
1227 my $subfieldvalue = $subf[$i][1];
1228 my $tagsubf = $tag . $subfieldcode;
1230 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1235 $summary =~ s/\[(.*?)]//g;
1236 $summary =~ s/\n/<br>/g;
1237 $oldbiblio->{summary} = $summary;
1240 # Add search-term highlighting to the whole record where they match using <span>s
1241 if (C4::Context->preference("OpacHighlightedWords")){
1242 my $searchhighlightblob;
1243 for my $highlight_field ( $marcrecord->fields ) {
1245 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1246 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1247 for my $subfield ($highlight_field->subfields()) {
1249 next if $subfield->[0] eq '9';
1250 my $field = $subfield->[1];
1251 for my $term ( keys %$span_terms_hashref ) {
1252 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1253 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1257 $searchhighlightblob .= $field . " ... " if $match;
1261 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1262 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1264 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1265 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1267 # Add search-term highlighting to the title, subtitle, etc. fields
1268 for my $term ( keys %$span_terms_hashref ) {
1269 my $old_term = $term;
1270 if ( length($term) > 3 ) {
1271 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1272 $oldbiblio->{'title'} =~
1273 s/$term/<span class=\"term\">$&<\/span>/gi;
1274 $oldbiblio->{'subtitle'} =~
1275 s/$term/<span class=\"term\">$&<\/span>/gi;
1276 $oldbiblio->{'author'} =~
1277 s/$term/<span class=\"term\">$&<\/span>/gi;
1278 $oldbiblio->{'publishercode'} =~
1279 s/$term/<span class=\"term\">$&<\/span>/gi;
1280 $oldbiblio->{'place'} =~
1281 s/$term/<span class=\"term\">$&<\/span>/gi;
1282 $oldbiblio->{'pages'} =~
1283 s/$term/<span class=\"term\">$&<\/span>/gi;
1284 $oldbiblio->{'notes'} =~
1285 s/$term/<span class=\"term\">$&<\/span>/gi;
1286 $oldbiblio->{'size'} =~
1287 s/$term/<span class=\"term\">$&<\/span>/gi;
1292 # surely there's a better way to handle this
1294 $toggle = "#ffffcc";
1299 $oldbiblio->{'toggle'} = $toggle;
1301 # Pull out the items fields
1302 my @fields = $marcrecord->field($itemtag);
1304 # Setting item statuses for display
1305 my @available_items_loop;
1306 my @onloan_items_loop;
1307 my @other_items_loop;
1309 my $available_items;
1313 my $ordered_count = 0;
1314 my $available_count = 0;
1315 my $onloan_count = 0;
1316 my $longoverdue_count = 0;
1317 my $other_count = 0;
1318 my $wthdrawn_count = 0;
1319 my $itemlost_count = 0;
1320 my $itembinding_count = 0;
1321 my $itemdamaged_count = 0;
1322 my $can_place_holds = 0;
1323 my $items_count = scalar(@fields);
1326 ( C4::Context->preference('maxItemsinSearchResults') )
1327 ? C4::Context->preference('maxItemsinSearchResults') - 1
1330 # loop through every item
1331 foreach my $field (@fields) {
1335 # populate the items hash
1336 foreach my $code ( keys %subfieldstosearch ) {
1337 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1340 # set item's branch name, use homebranch first, fall back to holdingbranch
1341 if ( $item->{'homebranch'} ) {
1342 $item->{'branchname'} = $branches{ $item->{homebranch} };
1346 elsif ( $item->{'holdingbranch'} ) {
1347 $item->{'branchname'} = $branches{ $item->{holdingbranch} };
1350 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1351 if ( $item->{onloan} ) {
1353 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{due_date} = format_date( $item->{onloan} );
1354 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{count}++ if $item->{'homebranch'};
1355 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{branchname} = $item->{'branchname'};
1356 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{location} = $locations{ $item->{location} };
1357 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{itemcallnumber} = $item->{itemcallnumber};
1358 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1359 # if something's checked out and lost, mark it as 'long overdue'
1360 if ( $item->{itemlost} ) {
1361 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{longoverdue}++;
1362 $longoverdue_count++;
1365 # can place holds as long as this item isn't lost
1367 $can_place_holds = 1;
1371 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1375 if ( $item->{notforloan} == -1 ) {
1379 # item is withdrawn, lost or damaged
1380 if ( $item->{wthdrawn}
1381 || $item->{itemlost}
1383 || $item->{notforloan} )
1385 $wthdrawn_count++ if $item->{wthdrawn};
1386 $itemlost_count++ if $item->{itemlost};
1387 $itemdamaged_count++ if $item->{damaged};
1388 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1391 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{wthdrawn} = $item->{wthdrawn};
1392 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemlost} = $item->{itemlost};
1393 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{damaged} = $item->{damaged};
1394 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{notforloan} = GetAuthorisedValueDesc( '', '', $item->{notforloan}, '', '', $notforloan_authorised_value ) if $notforloan_authorised_value;
1395 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{count}++ if $item->{'homebranch'};
1396 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{branchname} = $item->{'branchname'};
1397 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{location} = $locations{ $item->{location} };
1398 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1399 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1404 $can_place_holds = 1;
1406 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1407 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1408 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{location} = $locations{ $item->{location} };
1409 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1410 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1413 } # notforloan, item level and biblioitem level
1414 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1416 ( C4::Context->preference('maxItemsinSearchResults') )
1417 ? C4::Context->preference('maxItemsinSearchResults') - 1
1419 for my $key ( sort keys %$onloan_items ) {
1420 $onloanitemscount++;
1421 push @onloan_items_loop, $onloan_items->{$key}
1422 unless $onloanitemscount > $maxitems;
1424 for my $key ( sort keys %$other_items ) {
1426 push @other_items_loop, $other_items->{$key}
1427 unless $otheritemscount > $maxitems;
1429 for my $key ( sort keys %$available_items ) {
1430 $availableitemscount++;
1431 push @available_items_loop, $available_items->{$key}
1432 unless $availableitemscount > $maxitems;
1435 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1436 $can_place_holds = 0
1437 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1438 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1439 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1440 $oldbiblio->{items_count} = $items_count;
1441 $oldbiblio->{available_items_loop} = \@available_items_loop;
1442 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1443 $oldbiblio->{other_items_loop} = \@other_items_loop;
1444 $oldbiblio->{availablecount} = $available_count;
1445 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1446 $oldbiblio->{onloancount} = $onloan_count;
1447 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1448 $oldbiblio->{othercount} = $other_count;
1449 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1450 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1451 $oldbiblio->{itemlostcount} = $itemlost_count;
1452 $oldbiblio->{damagedcount} = $itemdamaged_count;
1453 $oldbiblio->{orderedcount} = $ordered_count;
1454 $oldbiblio->{isbn} =~
1455 s/-//g; # deleting - in isbn to enable amazon content
1456 push( @newresults, $oldbiblio );
1461 #----------------------------------------------------------------------
1463 # Non-Zebra GetRecords#
1464 #----------------------------------------------------------------------
1468 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1474 $query, $simple_query, $sort_by_ref, $servers_ref,
1475 $results_per_page, $offset, $expanded_facet, $branches,
1478 warn "query =$query" if $DEBUG;
1479 my $result = NZanalyse($query);
1480 warn "results =$result" if $DEBUG;
1482 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1488 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1489 the list is built from an inverted index in the nozebra SQL table
1490 note that title is here only for convenience : the sorting will be very fast when requested on title
1491 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1496 my ( $string, $server ) = @_;
1497 # warn "---------" if $DEBUG;
1498 warn " NZanalyse" if $DEBUG;
1499 # warn "---------" if $DEBUG;
1501 # $server contains biblioserver or authorities, depending on what we search on.
1502 #warn "querying : $string on $server";
1503 $server = 'biblioserver' unless $server;
1505 # if we have a ", replace the content to discard temporarily any and/or/not inside
1507 if ( $string =~ /"/ ) {
1508 $string =~ s/"(.*?)"/__X__/;
1510 warn "commacontent : $commacontent" if $DEBUG;
1513 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1514 # then, call again NZanalyse with $left and $right
1515 # (recursive until we find a leaf (=> something without and/or/not)
1516 # delete repeated operator... Would then go in infinite loop
1517 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1520 #process parenthesis before.
1521 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1524 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1526 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1528 my $leftresult = NZanalyse( $left, $server );
1530 my $rightresult = NZanalyse( $right, $server );
1532 # OK, we have the results for right and left part of the query
1533 # depending of operand, intersect, union or exclude both lists
1534 # to get a result list
1535 if ( $operator eq ' and ' ) {
1536 return NZoperatorAND($leftresult,$rightresult);
1538 elsif ( $operator eq ' or ' ) {
1540 # just merge the 2 strings
1541 return $leftresult . $rightresult;
1543 elsif ( $operator eq ' not ' ) {
1544 return NZoperatorNOT($leftresult,$rightresult);
1548 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1552 warn "string :" . $string if $DEBUG;
1553 $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1556 my $operator = lc($2); # FIXME: and/or/not are operators, not operands
1557 warn "no parenthesis. left : $left operator: $operator right: $right"
1560 # it's not a leaf, we have a and/or/not
1563 # reintroduce comma content if needed
1564 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1565 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1566 warn "node : $left / $operator / $right\n" if $DEBUG;
1567 my $leftresult = NZanalyse( $left, $server );
1568 my $rightresult = NZanalyse( $right, $server );
1569 warn " leftresult : $leftresult" if $DEBUG;
1570 warn " rightresult : $rightresult" if $DEBUG;
1571 # OK, we have the results for right and left part of the query
1572 # depending of operand, intersect, union or exclude both lists
1573 # to get a result list
1574 if ( $operator eq ' and ' ) {
1576 return NZoperatorAND($leftresult,$rightresult);
1578 elsif ( $operator eq ' or ' ) {
1580 # just merge the 2 strings
1581 return $leftresult . $rightresult;
1583 elsif ( $operator eq ' not ' ) {
1584 return NZoperatorNOT($leftresult,$rightresult);
1588 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1589 die "error : operand unknown : $operator for $string";
1592 # it's a leaf, do the real SQL query and return the result
1595 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1596 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1597 warn "leaf:$string" if $DEBUG;
1599 # parse the string in in operator/operand/value again
1600 $string =~ /(.*)(>=|<=)(.*)/;
1604 # warn "handling leaf... left:$left operator:$operator right:$right"
1606 unless ($operator) {
1607 $string =~ /(.*)(>|<|=)(.*)/;
1612 # "handling unless (operator)... left:$left operator:$operator right:$right"
1617 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1618 $left =~ s/[, ].*$//;
1620 # automatic replace for short operators
1621 $left = 'title' if $left =~ '^ti$';
1622 $left = 'author' if $left =~ '^au$';
1623 $left = 'publisher' if $left =~ '^pb$';
1624 $left = 'subject' if $left =~ '^su$';
1625 $left = 'koha-Auth-Number' if $left =~ '^an$';
1626 $left = 'keyword' if $left =~ '^kw$';
1627 warn "handling leaf... left:$left operator:$operator right:$right";
1628 if ( $operator && $left ne 'keyword' ) {
1630 #do a specific search
1631 my $dbh = C4::Context->dbh;
1632 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1635 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1637 warn "$left / $operator / $right\n";
1639 # split each word, query the DB and build the biblionumbers result
1640 #sanitizing leftpart
1641 $left =~ s/^\s+|\s+$//;
1642 foreach ( split / /, $right ) {
1644 $_ =~ s/^\s+|\s+$//;
1646 warn "EXECUTE : $server, $left, $_";
1647 $sth->execute( $server, $left, $_ )
1648 or warn "execute failed: $!";
1649 while ( my ( $line, $value ) = $sth->fetchrow ) {
1651 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1652 # otherwise, fill the result
1653 $biblionumbers .= $line
1654 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1655 warn "result : $value "
1656 . ( $right =~ /\d/ ) . "=="
1657 . ( $value =~ /\D/?$line:"" ); #= $line";
1660 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1663 $results = NZoperatorAND($biblionumbers,$results);
1666 $results = $biblionumbers;
1672 #do a complete search (all indexes), if index='kw' do complete search too.
1673 my $dbh = C4::Context->dbh;
1676 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1679 # split each word, query the DB and build the biblionumbers result
1680 foreach ( split / /, $string ) {
1681 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1682 warn "search on all indexes on $_" if $DEBUG;
1685 $sth->execute( $server, $_ );
1686 while ( my $line = $sth->fetchrow ) {
1687 $biblionumbers .= $line;
1690 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1692 $results = NZoperatorAND($biblionumbers,$results);
1695 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1696 $results = $biblionumbers;
1700 warn "return : $results for LEAF : $string" if $DEBUG;
1703 warn "---------" if $DEBUG;
1704 warn "Leave NZanalyse" if $DEBUG;
1705 warn "---------" if $DEBUG;
1709 my ($rightresult, $leftresult)=@_;
1711 my @leftresult = split /;/, $leftresult;
1712 warn " @leftresult / $rightresult \n" if $DEBUG;
1714 # my @rightresult = split /;/,$leftresult;
1717 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1718 # the result is stored twice, to have the same weight for AND than OR.
1719 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1720 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1721 foreach (@leftresult) {
1724 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1725 if ( $rightresult =~ /$value-(\d+);/ ) {
1726 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1728 "$value-$countvalue;$value-$countvalue;";
1731 warn " $finalresult \n" if $DEBUG;
1732 return $finalresult;
1736 my ($rightresult, $leftresult)=@_;
1737 return $rightresult.$leftresult;
1741 my ($rightresult, $leftresult)=@_;
1743 my @leftresult = split /;/, $leftresult;
1745 # my @rightresult = split /;/,$leftresult;
1747 foreach (@leftresult) {
1749 $value=$1 if $value=~m/(.*)-\d+$/;
1750 unless ($rightresult =~ "$value-") {
1751 $finalresult .= "$_;";
1754 return $finalresult;
1759 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1766 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1767 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1769 # order title asc by default
1770 # $ordering = '1=36 <i' unless $ordering;
1771 $results_per_page = 20 unless $results_per_page;
1772 $offset = 0 unless $offset;
1773 my $dbh = C4::Context->dbh;
1776 # order by POPULARITY
1778 if ( $ordering =~ /popularity/ ) {
1782 # popularity is not in MARC record, it's builded from a specific query
1784 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1785 foreach ( split /;/, $biblionumbers ) {
1786 my ( $biblionumber, $title ) = split /,/, $_;
1787 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1788 $sth->execute($biblionumber);
1789 my $popularity = $sth->fetchrow || 0;
1791 # hint : the key is popularity.title because we can have
1792 # many results with the same popularity. In this cas, sub-ordering is done by title
1793 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1794 # (un-frequent, I agree, but we won't forget anything that way ;-)
1795 $popularity{ sprintf( "%10d", $popularity ) . $title
1796 . $biblionumber } = $biblionumber;
1799 # sort the hash and return the same structure as GetRecords (Zebra querying)
1802 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1803 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1804 $result_hash->{'RECORDS'}[ $numbers++ ] =
1805 $result{ $popularity{$key} }->as_usmarc();
1808 else { # sort popularity ASC
1809 foreach my $key ( sort ( keys %popularity ) ) {
1810 $result_hash->{'RECORDS'}[ $numbers++ ] =
1811 $result{ $popularity{$key} }->as_usmarc();
1814 my $finalresult = ();
1815 $result_hash->{'hits'} = $numbers;
1816 $finalresult->{'biblioserver'} = $result_hash;
1817 return $finalresult;
1823 elsif ( $ordering =~ /author/ ) {
1825 foreach ( split /;/, $biblionumbers ) {
1826 my ( $biblionumber, $title ) = split /,/, $_;
1827 my $record = GetMarcBiblio($biblionumber);
1829 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1830 $author = $record->subfield( '200', 'f' );
1831 $author = $record->subfield( '700', 'a' ) unless $author;
1834 $author = $record->subfield( '100', 'a' );
1837 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1838 # and we don't want to get only 1 result for each of them !!!
1839 $result{ $author . $biblionumber } = $record;
1842 # sort the hash and return the same structure as GetRecords (Zebra querying)
1845 if ( $ordering eq 'author_za' ) { # sort by author desc
1846 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1847 $result_hash->{'RECORDS'}[ $numbers++ ] =
1848 $result{$key}->as_usmarc();
1851 else { # sort by author ASC
1852 foreach my $key ( sort ( keys %result ) ) {
1853 $result_hash->{'RECORDS'}[ $numbers++ ] =
1854 $result{$key}->as_usmarc();
1857 my $finalresult = ();
1858 $result_hash->{'hits'} = $numbers;
1859 $finalresult->{'biblioserver'} = $result_hash;
1860 return $finalresult;
1863 # ORDER BY callnumber
1866 elsif ( $ordering =~ /callnumber/ ) {
1868 foreach ( split /;/, $biblionumbers ) {
1869 my ( $biblionumber, $title ) = split /,/, $_;
1870 my $record = GetMarcBiblio($biblionumber);
1872 my ( $callnumber_tag, $callnumber_subfield ) =
1873 GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1874 ( $callnumber_tag, $callnumber_subfield ) =
1875 GetMarcFromKohaField('biblioitems.callnumber')
1876 unless $callnumber_tag;
1877 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1878 $callnumber = $record->subfield( '200', 'f' );
1881 $callnumber = $record->subfield( '100', 'a' );
1884 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1885 # and we don't want to get only 1 result for each of them !!!
1886 $result{ $callnumber . $biblionumber } = $record;
1889 # sort the hash and return the same structure as GetRecords (Zebra querying)
1892 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1893 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1894 $result_hash->{'RECORDS'}[ $numbers++ ] =
1895 $result{$key}->as_usmarc();
1898 else { # sort by title ASC
1899 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1900 $result_hash->{'RECORDS'}[ $numbers++ ] =
1901 $result{$key}->as_usmarc();
1904 my $finalresult = ();
1905 $result_hash->{'hits'} = $numbers;
1906 $finalresult->{'biblioserver'} = $result_hash;
1907 return $finalresult;
1909 elsif ( $ordering =~ /pubdate/ ) { #pub year
1911 foreach ( split /;/, $biblionumbers ) {
1912 my ( $biblionumber, $title ) = split /,/, $_;
1913 my $record = GetMarcBiblio($biblionumber);
1914 my ( $publicationyear_tag, $publicationyear_subfield ) =
1915 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1916 my $publicationyear =
1917 $record->subfield( $publicationyear_tag,
1918 $publicationyear_subfield );
1920 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1921 # and we don't want to get only 1 result for each of them !!!
1922 $result{ $publicationyear . $biblionumber } = $record;
1925 # sort the hash and return the same structure as GetRecords (Zebra querying)
1928 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1929 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1930 $result_hash->{'RECORDS'}[ $numbers++ ] =
1931 $result{$key}->as_usmarc();
1934 else { # sort by pub year ASC
1935 foreach my $key ( sort ( keys %result ) ) {
1936 $result_hash->{'RECORDS'}[ $numbers++ ] =
1937 $result{$key}->as_usmarc();
1940 my $finalresult = ();
1941 $result_hash->{'hits'} = $numbers;
1942 $finalresult->{'biblioserver'} = $result_hash;
1943 return $finalresult;
1949 elsif ( $ordering =~ /title/ ) {
1951 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1953 foreach ( split /;/, $biblionumbers ) {
1954 my ( $biblionumber, $title ) = split /,/, $_;
1956 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1957 # and we don't want to get only 1 result for each of them !!!
1958 # hint & speed improvement : we can order without reading the record
1959 # so order, and read records only for the requested page !
1960 $result{ $title . $biblionumber } = $biblionumber;
1963 # sort the hash and return the same structure as GetRecords (Zebra querying)
1966 if ( $ordering eq 'title_az' ) { # sort by title desc
1967 foreach my $key ( sort ( keys %result ) ) {
1968 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1971 else { # sort by title ASC
1972 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1973 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1977 # limit the $results_per_page to result size if it's more
1978 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
1980 # for the requested page, replace biblionumber by the complete record
1981 # speed improvement : avoid reading too much things
1983 my $counter = $offset ;
1984 $counter <= $offset + $results_per_page ;
1988 $result_hash->{'RECORDS'}[$counter] =
1989 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
1991 my $finalresult = ();
1992 $result_hash->{'hits'} = $numbers;
1993 $finalresult->{'biblioserver'} = $result_hash;
1994 return $finalresult;
2001 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2004 foreach ( split /;/, $biblionumbers ) {
2005 my ( $biblionumber, $title ) = split /,/, $_;
2006 $title =~ /(.*)-(\d)/;
2011 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2012 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2013 # biblio N has ranking = 6
2014 $count_ranking{$biblionumber} += $ranking;
2017 # build the result by "inverting" the count_ranking hash
2018 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2020 foreach ( keys %count_ranking ) {
2021 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2024 # sort the hash and return the same structure as GetRecords (Zebra querying)
2027 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2028 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2031 # limit the $results_per_page to result size if it's more
2032 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2034 # for the requested page, replace biblionumber by the complete record
2035 # speed improvement : avoid reading too much things
2037 my $counter = $offset ;
2038 $counter <= $offset + $results_per_page ;
2042 $result_hash->{'RECORDS'}[$counter] =
2043 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2044 if $result_hash->{'RECORDS'}[$counter];
2046 my $finalresult = ();
2047 $result_hash->{'hits'} = $numbers;
2048 $finalresult->{'biblioserver'} = $result_hash;
2049 return $finalresult;
2055 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2057 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2058 test parameter if set donot perform change to records in database.
2064 * $listbiblios is an array ref to marcrecords to be changed
2065 * $tagsubfield is the reference of the subfield to change.
2066 * $initvalue is the value to search the record for
2067 * $targetvalue is the value to set the subfield to
2068 * $test is to be set only not to perform changes in database.
2070 =item C<Output arg:>
2071 * $countchanged counts all the changes performed.
2072 * $listunchanged contains the list of all the biblionumbers of records unchanged.
2074 =item C<usage in the script:>
2078 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2079 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2080 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2085 my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2088 my ( $tag, $subfield ) = ( $1, $2 )
2089 if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2090 if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2091 $tag = $tag . $subfield;
2094 my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber');
2095 my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2096 if ($tag eq $itemtag) {
2097 # do not allow the embedded item tag to be
2099 warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2102 foreach my $usmarc (@$listbiblios) {
2104 $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2108 # usmarc is not a valid usmarc May be a biblionumber
2109 # FIXME - sorry, please let's figure out whether
2110 # this function is to be passed a list of
2111 # record numbers or a list of MARC::Record
2112 # objects. The former is probably better
2113 # because the MARC records supplied by Zebra
2114 # may be not current.
2115 $record = GetMarcBiblio($usmarc);
2116 $biblionumber = $usmarc;
2119 if ( $bntag >= 010 ) {
2120 $biblionumber = $record->subfield( $bntag, $bnsubf );
2123 $biblionumber = $record->field($bntag)->data;
2127 #GetBiblionumber is to be written.
2128 #Could be replaced by TransformMarcToKoha (But Would be longer)
2129 if ( $record->field($tag) ) {
2131 foreach my $field ( $record->field($tag) ) {
2134 $field->delete_subfield(
2135 'code' => $subfield,
2136 'match' => qr($initvalue)
2142 $field->update( $subfield, $targetvalue )
2147 if ( $tag >= 010 ) {
2148 if ( $field->delete_field($field) ) {
2154 $field->data = $targetvalue
2155 if ( $field->data =~ qr($initvalue) );
2160 # warn $record->as_formatted;
2162 ModBiblio( $record, $biblionumber,
2163 GetFrameworkCode($biblionumber) )
2167 push @unmatched, $biblionumber;
2171 push @unmatched, $biblionumber;
2174 return ( $countmatched, \@unmatched );
2177 END { } # module clean-up code here (global destructor)
2184 Koha Developement team <info@koha.org>