3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField, GetBiblioData
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
34 # set the version for version checking
37 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
42 C4::Search - Functions for searching the Koha catalog.
46 See opac/opac-search.pl or catalogue/search.pl for example of usage
50 This module provides searching functions for Koha's bibliographic databases
66 # make all your functions, whether exported or not;
70 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
72 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
78 my $dbh = C4::Context->dbh;
79 my $result = TransformMarcToKoha( $dbh, $record, '' );
84 my ( $biblionumber, $title );
86 # search duplicate on ISBN, easy and fast..
88 if ( $result->{isbn} ) {
89 $result->{isbn} =~ s/\(.*$//;
90 $result->{isbn} =~ s/\s+$//;
91 $query = "isbn=$result->{isbn}";
94 $result->{title} =~ s /\\//g;
95 $result->{title} =~ s /\"//g;
96 $result->{title} =~ s /\(//g;
97 $result->{title} =~ s /\)//g;
99 # FIXME: instead of removing operators, could just do
100 # quotes around the value
101 $result->{title} =~ s/(and|or|not)//g;
102 $query = "ti,ext=$result->{title}";
103 $query .= " and itemtype=$result->{itemtype}"
104 if ( $result->{itemtype} );
105 if ( $result->{author} ) {
106 $result->{author} =~ s /\\//g;
107 $result->{author} =~ s /\"//g;
108 $result->{author} =~ s /\(//g;
109 $result->{author} =~ s /\)//g;
111 # remove valid operators
112 $result->{author} =~ s/(and|or|not)//g;
113 $query .= " and au,ext=$result->{author}";
117 # FIXME: add error handling
118 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
120 foreach my $possible_duplicate_record (@$searchresults) {
122 MARC::Record->new_from_usmarc($possible_duplicate_record);
123 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
125 # FIXME :: why 2 $biblionumber ?
127 push @results, $result->{'biblionumber'};
128 push @results, $result->{'title'};
136 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
138 This function provides a simple search API on the bibliographic catalog
144 * $query can be a simple keyword or a complete CCL query
145 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
146 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
147 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
152 * $error is a empty unless an error is detected
153 * \@results is an array of records.
154 * $total_hits is the number of hits that would have been returned with no limit
156 =item C<usage in the script:>
160 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
162 if (defined $error) {
163 $template->param(query_error => $error);
164 warn "error: ".$error;
165 output_html_with_http_headers $input, $cookie, $template->output;
169 my $hits = scalar @$marcresults;
172 for my $i (0..$hits) {
174 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
175 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
177 #build the hash for the template.
178 $resultsloop{title} = $biblio->{'title'};
179 $resultsloop{subtitle} = $biblio->{'subtitle'};
180 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
181 $resultsloop{author} = $biblio->{'author'};
182 $resultsloop{publishercode} = $biblio->{'publishercode'};
183 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
185 push @results, \%resultsloop;
188 $template->param(result=>\@results);
193 my ( $query, $offset, $max_results, $servers ) = @_;
195 if ( C4::Context->preference('NoZebra') ) {
196 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
199 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
200 return ( undef, $search_result, scalar($result->{hits}) );
203 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
204 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
210 return ( "No query entered", undef, undef ) unless $query;
212 # Initialize & Search Zebra
213 for ( my $i = 0 ; $i < @servers ; $i++ ) {
215 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
216 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
217 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
221 $zconns[$i]->errmsg() . " ("
222 . $zconns[$i]->errcode() . ") "
223 . $zconns[$i]->addinfo() . " "
224 . $zconns[$i]->diagset();
226 return ( $error, undef, undef ) if $zconns[$i]->errcode();
230 # caught a ZOOM::Exception
234 . $@->addinfo() . " "
237 return ( $error, undef, undef );
240 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
241 my $event = $zconns[ $i - 1 ]->last_event();
242 if ( $event == ZOOM::Event::ZEND ) {
244 my $first_record = defined( $offset ) ? $offset+1 : 1;
245 my $hits = $tmpresults[ $i - 1 ]->size();
246 $total_hits += $hits;
247 my $last_record = $hits;
248 if ( defined $max_results && $offset + $max_results < $hits ) {
249 $last_record = $offset + $max_results;
252 for my $j ( $first_record..$last_record ) {
253 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
254 push @results, $record;
259 foreach my $result (@tmpresults) {
262 foreach my $zoom_query (@zoom_queries) {
263 $zoom_query->destroy();
266 return ( undef, \@results, $total_hits );
272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
274 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
275 $results_per_page, $offset, $expanded_facet, $branches,
279 The all singing, all dancing, multi-server, asynchronous, scanning,
280 searching, record nabbing, facet-building
282 See verbse embedded documentation.
288 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
289 $results_per_page, $offset, $expanded_facet, $branches,
293 my @servers = @$servers_ref;
294 my @sort_by = @$sort_by_ref;
296 # Initialize variables for the ZOOM connection and results object
300 my $results_hashref = ();
302 # Initialize variables for the faceted results objects
303 my $facets_counter = ();
304 my $facets_info = ();
305 my $facets = getFacets();
307 my @facets_loop; # stores the ref to array of hashes for template facets loop
309 ### LOOP THROUGH THE SERVERS
310 for ( my $i = 0 ; $i < @servers ; $i++ ) {
311 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
313 # perform the search, create the results objects
314 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
315 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
317 #$query_to_use = $simple_query if $scan;
318 warn $simple_query if ( $scan and $DEBUG );
320 # Check if we've got a query_type defined, if so, use it
323 if ($query_type =~ /^ccl/) {
324 $query_to_use =~ s/\:/\=/g; # change : to = last minute (FIXME)
325 $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
326 } elsif ($query_type =~ /^cql/) {
327 $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
328 } elsif ($query_type =~ /^pqf/) {
329 $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
331 warn "Unknown query_type '$query_type'. Results undetermined.";
334 $results[$i] = $zconns[$i]->scan( new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
336 $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
340 warn "WARNING: query problem with $query_to_use " . $@;
343 # Concatenate the sort_by limits and pass them to the results object
344 # Note: sort will override rank
346 foreach my $sort (@sort_by) {
347 if ( $sort eq "author_az" ) {
348 $sort_by .= "1=1003 <i ";
350 elsif ( $sort eq "author_za" ) {
351 $sort_by .= "1=1003 >i ";
353 elsif ( $sort eq "popularity_asc" ) {
354 $sort_by .= "1=9003 <i ";
356 elsif ( $sort eq "popularity_dsc" ) {
357 $sort_by .= "1=9003 >i ";
359 elsif ( $sort eq "call_number_asc" ) {
360 $sort_by .= "1=20 <i ";
362 elsif ( $sort eq "call_number_dsc" ) {
363 $sort_by .= "1=20 >i ";
365 elsif ( $sort eq "pubdate_asc" ) {
366 $sort_by .= "1=31 <i ";
368 elsif ( $sort eq "pubdate_dsc" ) {
369 $sort_by .= "1=31 >i ";
371 elsif ( $sort eq "acqdate_asc" ) {
372 $sort_by .= "1=32 <i ";
374 elsif ( $sort eq "acqdate_dsc" ) {
375 $sort_by .= "1=32 >i ";
377 elsif ( $sort eq "title_az" ) {
378 $sort_by .= "1=4 <i ";
380 elsif ( $sort eq "title_za" ) {
381 $sort_by .= "1=4 >i ";
384 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
388 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
389 warn "WARNING sort $sort_by failed";
392 } # finished looping through servers
394 # The big moment: asynchronously retrieve results from all servers
395 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
396 my $ev = $zconns[ $i - 1 ]->last_event();
397 if ( $ev == ZOOM::Event::ZEND ) {
398 next unless $results[ $i - 1 ];
399 my $size = $results[ $i - 1 ]->size();
403 # loop through the results
404 $results_hash->{'hits'} = $size;
406 if ( $offset + $results_per_page <= $size ) {
407 $times = $offset + $results_per_page;
412 for ( my $j = $offset ; $j < $times ; $j++ ) {
417 ## Check if it's an index scan
419 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
421 # here we create a minimal MARC record and hand it off to the
422 # template just like a normal result ... perhaps not ideal, but
424 my $tmprecord = MARC::Record->new();
425 $tmprecord->encoding('UTF-8');
429 # the minimal record in author/title (depending on MARC flavour)
430 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
431 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
432 $tmprecord->append_fields($tmptitle);
434 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
435 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
436 $tmprecord->append_fields($tmptitle);
437 $tmprecord->append_fields($tmpauthor);
439 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
444 $record = $results[ $i - 1 ]->record($j)->raw();
446 # warn "RECORD $j:".$record;
447 $results_hash->{'RECORDS'}[$j] = $record;
449 # Fill the facets while we're looping, but only for the biblioserver
450 $facet_record = MARC::Record->new_from_usmarc($record)
451 if $servers[ $i - 1 ] =~ /biblioserver/;
453 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
455 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
456 ($facets->[$k]) or next;
457 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
458 for my $field (@fields) {
459 my @subfields = $field->subfields();
460 for my $subfield (@subfields) {
461 my ( $code, $data ) = @$subfield;
462 ($code eq $facets->[$k]->{'subfield'}) or next;
463 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
466 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
467 $facets->[$k]->{'label_value'};
468 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
469 $facets->[$k]->{'expanded'};
474 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
477 # warn "connection ", $i-1, ": $size hits";
478 # warn $results[$i-1]->record(0)->render() if $size > 0;
481 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
483 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
484 keys %$facets_counter )
487 my $number_of_facets;
488 my @this_facets_array;
491 $facets_counter->{$link_value}->{$b}
492 <=> $facets_counter->{$link_value}->{$a}
493 } keys %{ $facets_counter->{$link_value} }
497 if ( ( $number_of_facets < 6 )
498 || ( $expanded_facet eq $link_value )
499 || ( $facets_info->{$link_value}->{'expanded'} ) )
502 # Sanitize the link value ), ( will cause errors with CCL,
503 my $facet_link_value = $one_facet;
504 $facet_link_value =~ s/(\(|\))/ /g;
506 # fix the length that will display in the label,
507 my $facet_label_value = $one_facet;
509 substr( $one_facet, 0, 20 ) . "..."
510 unless length($facet_label_value) <= 20;
512 # if it's a branch, label by the name, not the code,
513 if ( $link_value =~ /branch/ ) {
515 $branches->{$one_facet}->{'branchname'};
518 # but we're down with the whole label being in the link's title.
519 push @this_facets_array, {
520 facet_count => $facets_counter->{$link_value}->{$one_facet},
521 facet_label_value => $facet_label_value,
522 facet_title_value => $one_facet,
523 facet_link_value => $facet_link_value,
524 type_link_value => $link_value,
529 # handle expanded option
530 unless ( $facets_info->{$link_value}->{'expanded'} ) {
532 if ( ( $number_of_facets > 6 )
533 && ( $expanded_facet ne $link_value ) );
536 type_link_value => $link_value,
537 type_id => $link_value . "_id",
538 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
539 facets => \@this_facets_array,
540 expandable => $expandable,
541 expand => $link_value,
542 } unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
547 return ( undef, $results_hashref, \@facets_loop );
552 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
553 $results_per_page, $offset, $expanded_facet, $branches,
557 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
559 $paz->search($simple_query);
560 sleep 1; # FIXME: WHY?
563 my $results_hashref = {};
564 my $stats = XMLin($paz->stat);
565 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
567 # for a grouped search result, the number of hits
568 # is the number of groups returned; 'bib_hits' will have
569 # the total number of bibs.
570 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
571 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
573 HIT: foreach my $hit (@{ $results->{'hit'} }) {
574 my $recid = $hit->{recid}->[0];
576 my $work_title = $hit->{'md-work-title'}->[0];
578 if (exists $hit->{'md-work-author'}) {
579 $work_author = $hit->{'md-work-author'}->[0];
581 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
583 my $result_group = {};
584 $result_group->{'group_label'} = $group_label;
585 $result_group->{'group_merge_key'} = $recid;
588 if (exists $hit->{count}) {
589 $count = $hit->{count}->[0];
591 $result_group->{'group_count'} = $count;
593 for (my $i = 0; $i < $count; $i++) {
594 # FIXME -- may need to worry about diacritics here
595 my $rec = $paz->record($recid, $i);
596 push @{ $result_group->{'RECORDS'} }, $rec;
599 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
602 # pass through facets
603 my $termlist_xml = $paz->termlist('author,subject');
604 my $terms = XMLin($termlist_xml, forcearray => 1);
605 my @facets_loop = ();
606 #die Dumper($results);
607 # foreach my $list (sort keys %{ $terms->{'list'} }) {
609 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
611 # facet_label_value => $facet->{'name'}->[0],
614 # push @facets_loop, ( {
615 # type_label => $list,
616 # facets => \@facets,
620 return ( undef, $results_hashref, \@facets_loop );
624 sub _remove_stopwords {
625 my ( $operand, $index ) = @_;
626 my @stopwords_removed;
628 # phrase and exact-qualified indexes shouldn't have stopwords removed
629 if ( $index !~ m/phr|ext/ ) {
631 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
632 # we use IsAlpha unicode definition, to deal correctly with diacritics.
633 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
634 # is a stopword, we'd get "çon" and wouldn't find anything...
635 foreach ( keys %{ C4::Context->stopwords } ) {
636 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
638 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
640 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
641 $operand =~ s/^$_\P{IsAlpha}/ /gi;
642 $operand =~ s/\P{IsAlpha}$_$/ /gi;
643 $operand =~ s/$1//gi;
644 push @stopwords_removed, $_;
648 return ( $operand, \@stopwords_removed );
652 sub _detect_truncation {
653 my ( $operand, $index ) = @_;
654 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
657 my @wordlist = split( /\s/, $operand );
658 foreach my $word (@wordlist) {
659 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
660 push @rightlefttruncated, $word;
662 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
663 push @lefttruncated, $word;
665 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
666 push @righttruncated, $word;
668 elsif ( index( $word, "*" ) < 0 ) {
669 push @nontruncated, $word;
672 push @regexpr, $word;
676 \@nontruncated, \@righttruncated, \@lefttruncated,
677 \@rightlefttruncated, \@regexpr
682 sub _build_stemmed_operand {
686 # If operand contains a digit, it is almost certainly an identifier, and should
687 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
688 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
689 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
690 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
691 return $operand if $operand =~ /\d/;
693 # FIXME: the locale should be set based on the user's language and/or search choice
694 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
696 # FIXME: these should be stored in the db so the librarian can modify the behavior
697 $stemmer->add_exceptions(
704 my @words = split( / /, $operand );
705 my $stems = $stemmer->stem(@words);
706 for my $stem (@$stems) {
707 $stemmed_operand .= "$stem";
708 $stemmed_operand .= "?"
709 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
710 $stemmed_operand .= " ";
712 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
713 return $stemmed_operand;
717 sub _build_weighted_query {
719 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
720 # pretty well but could work much better if we had a smarter query parser
721 my ( $operand, $stemmed_operand, $index ) = @_;
722 my $stemming = C4::Context->preference("QueryStemming") || 0;
723 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
724 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
726 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
728 # Keyword, or, no index specified
729 if ( ( $index eq 'kw' ) || ( !$index ) ) {
731 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
732 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
733 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
734 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
735 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
736 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
737 if $fuzzy_enabled; # add fuzzy, word list
738 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
739 if ( $stemming and $stemmed_operand )
740 ; # add stemming, right truncation
741 $weighted_query .= " or wrdl,r9=\"$operand\"";
743 # embedded sorting: 0 a-z; 1 z-a
744 # $weighted_query .= ") or (sort1,aut=1";
747 # Barcode searches should skip this process
748 elsif ( $index eq 'bc' ) {
749 $weighted_query .= "bc=\"$operand\"";
752 # Authority-number searches should skip this process
753 elsif ( $index eq 'an' ) {
754 $weighted_query .= "an=\"$operand\"";
757 # If the index already has more than one qualifier, wrap the operand
758 # in quotes and pass it back (assumption is that the user knows what they
759 # are doing and won't appreciate us mucking up their query
760 elsif ( $index =~ ',' ) {
761 $weighted_query .= " $index=\"$operand\"";
764 #TODO: build better cases based on specific search indexes
766 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
767 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
768 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
770 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
773 $weighted_query .= "))"; # close rank specification
774 return $weighted_query;
780 $simple_query, $query_cgi,
782 $limit_cgi, $limit_desc,
783 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
785 Build queries and limits in CCL, CGI, Human,
786 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
788 See verbose embedded documentation.
794 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
796 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
799 my @operators = $operators ? @$operators : ();
800 my @indexes = $indexes ? @$indexes : ();
801 my @operands = $operands ? @$operands : ();
802 my @limits = $limits ? @$limits : ();
803 my @sort_by = $sort_by ? @$sort_by : ();
805 my $stemming = C4::Context->preference("QueryStemming") || 0;
806 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
807 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
808 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
809 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
811 # no stemming/weight/fuzzy in NoZebra
812 if ( C4::Context->preference("NoZebra") ) {
818 my $query = $operands[0];
819 my $simple_query = $operands[0];
821 # initialize the variables we're passing back
830 my $stopwords_removed; # flag to determine if stopwords have been removed
832 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
834 if ( $query =~ /^ccl=/ ) {
835 return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
837 if ( $query =~ /^cql=/ ) {
838 return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
840 if ( $query =~ /^pqf=/ ) {
841 return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
844 # pass nested queries directly
845 # FIXME: need better handling of some of these variables in this case
846 if ( $query =~ /(\(|\))/ ) {
848 undef, $query, $simple_query, $query_cgi,
849 $query, $limit, $limit_cgi, $limit_desc,
850 $stopwords_removed, 'ccl'
854 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
855 # query operands and indexes and add stemming, truncation, field weighting, etc.
856 # Once we do so, we'll end up with a value in $query, just like if we had an
857 # incoming $query from the user
860 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
862 ; # a flag used to keep track if there was a previous query
863 # if there was, we can apply the current operator
865 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
867 # COMBINE OPERANDS, INDEXES AND OPERATORS
868 if ( $operands[$i] ) {
870 # A flag to determine whether or not to add the index to the query
873 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
874 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
877 $remove_stopwords = 0;
879 my $operand = $operands[$i];
880 my $index = $indexes[$i];
882 # Add index-specific attributes
883 # Date of Publication
884 if ( $index eq 'yr' ) {
885 $index .= ",st-numeric";
887 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
890 # Date of Acquisition
891 elsif ( $index eq 'acqdate' ) {
892 $index .= ",st-date-normalized";
894 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
896 # ISBN,ISSN,Standard Number, don't need special treatment
897 elsif ( $index eq 'nb' || $index eq 'ns' ) {
900 $stemming, $auto_truncation,
901 $weight_fields, $fuzzy_enabled,
903 ) = ( 0, 0, 0, 0, 0 );
906 # Set default structure attribute (word list)
908 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
909 $struct_attr = ",wrdl";
912 # Some helpful index variants
913 my $index_plus = $index . $struct_attr . ":" if $index;
914 my $index_plus_comma = $index . $struct_attr . "," if $index;
915 if ($auto_truncation){
916 # FIXME Auto Truncation is only valid for LTR languages
918 # use C4::Languages qw(regex_lang_subtags get_bidi);
919 # $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
920 # my $current_lang = regex_lang_subtags($lang);
922 # $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
923 $index_plus_comma .= "rtrn:";
927 if ($remove_stopwords) {
928 ( $operand, $stopwords_removed ) =
929 _remove_stopwords( $operand, $index );
930 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
931 warn "REMOVED STOPWORDS: @$stopwords_removed"
932 if ( $stopwords_removed && $DEBUG );
936 my $truncated_operand;
937 my( $nontruncated, $righttruncated, $lefttruncated,
938 $rightlefttruncated, $regexpr
939 ) = _detect_truncation( $operand, $index );
941 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
946 scalar(@$righttruncated) + scalar(@$lefttruncated) +
947 scalar(@$rightlefttruncated) > 0 )
950 # Don't field weight or add the index to the query, we do it here
952 undef $weight_fields;
953 my $previous_truncation_operand;
954 if (scalar @$nontruncated) {
955 $truncated_operand .= "$index_plus @$nontruncated ";
956 $previous_truncation_operand = 1;
958 if (scalar @$righttruncated) {
959 $truncated_operand .= "and " if $previous_truncation_operand;
960 $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
961 $previous_truncation_operand = 1;
963 if (scalar @$lefttruncated) {
964 $truncated_operand .= "and " if $previous_truncation_operand;
965 $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
966 $previous_truncation_operand = 1;
968 if (scalar @$rightlefttruncated) {
969 $truncated_operand .= "and " if $previous_truncation_operand;
970 $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
971 $previous_truncation_operand = 1;
974 $operand = $truncated_operand if $truncated_operand;
975 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
979 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
981 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
983 # Handle Field Weighting
984 my $weighted_operand;
985 if ($weight_fields) {
986 $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
987 $operand = $weighted_operand;
991 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
993 # If there's a previous operand, we need to add an operator
994 if ($previous_operand) {
996 # User-specified operator
997 if ( $operators[ $i - 1 ] ) {
998 $query .= " $operators[$i-1] ";
999 $query .= " $index_plus " unless $indexes_set;
1000 $query .= " $operand";
1001 $query_cgi .= "&op=$operators[$i-1]";
1002 $query_cgi .= "&idx=$index" if $index;
1003 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1005 " $operators[$i-1] $index_plus $operands[$i]";
1008 # Default operator is and
1011 $query .= "$index_plus " unless $indexes_set;
1012 $query .= "$operand";
1013 $query_cgi .= "&op=and&idx=$index" if $index;
1014 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1015 $query_desc .= " and $index_plus $operands[$i]";
1019 # There isn't a pervious operand, don't need an operator
1022 # Field-weighted queries already have indexes set
1023 $query .= " $index_plus " unless $indexes_set;
1025 $query_desc .= " $index_plus $operands[$i]";
1026 $query_cgi .= "&idx=$index" if $index;
1027 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1028 $previous_operand = 1;
1033 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1036 my $group_OR_limits;
1037 my $availability_limit;
1038 foreach my $this_limit (@limits) {
1039 if ( $this_limit =~ /available/ ) {
1041 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1043 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1044 $availability_limit .=
1045 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1046 $limit_cgi .= "&limit=available";
1050 # group_OR_limits, prefixed by mc-
1051 # OR every member of the group
1052 elsif ( $this_limit =~ /mc/ ) {
1053 $group_OR_limits .= " or " if $group_OR_limits;
1054 $limit_desc .= " or " if $group_OR_limits;
1055 $group_OR_limits .= "$this_limit";
1056 $limit_cgi .= "&limit=$this_limit";
1057 $limit_desc .= " $this_limit";
1060 # Regular old limits
1062 $limit .= " and " if $limit || $query;
1063 $limit .= "$this_limit";
1064 $limit_cgi .= "&limit=$this_limit";
1065 if ($this_limit =~ /^branch:(.+)/) {
1066 my $branchcode = $1;
1067 my $branchname = GetBranchName($branchcode);
1068 if (defined $branchname) {
1069 $limit_desc .= " branch:$branchname";
1071 $limit_desc .= " $this_limit";
1074 $limit_desc .= " $this_limit";
1078 if ($group_OR_limits) {
1079 $limit .= " and " if ( $query || $limit );
1080 $limit .= "($group_OR_limits)";
1082 if ($availability_limit) {
1083 $limit .= " and " if ( $query || $limit );
1084 $limit .= "($availability_limit)";
1087 # Normalize the query and limit strings
1090 for ( $query, $query_desc, $limit, $limit_desc ) {
1091 s/ / /g; # remove extra spaces
1092 s/^ //g; # remove any beginning spaces
1093 s/ $//g; # remove any ending spaces
1094 s/==/=/g; # remove double == from query
1096 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1098 for ($query_cgi,$simple_query) {
1101 # append the limit to the query
1102 $query .= " " . $limit;
1106 warn "QUERY:" . $query;
1107 warn "QUERY CGI:" . $query_cgi;
1108 warn "QUERY DESC:" . $query_desc;
1109 warn "LIMIT:" . $limit;
1110 warn "LIMIT CGI:" . $limit_cgi;
1111 warn "LIMIT DESC:" . $limit_desc;
1112 warn "---------\nLeave buildQuery\n---------";
1115 undef, $query, $simple_query, $query_cgi,
1116 $query_desc, $limit, $limit_cgi, $limit_desc,
1117 $stopwords_removed, $query_type
1121 =head2 searchResults
1123 Format results in a form suitable for passing to the template
1127 # IMO this subroutine is pretty messy still -- it's responsible for
1128 # building the HTML output for the template
1130 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1131 my $dbh = C4::Context->dbh;
1134 #Build branchnames hash
1136 #get branch information.....
1138 my $bsth =$dbh->prepare("SELECT branchcode,branchname FROM branches"); # FIXME : use C4::Branch::GetBranches
1140 while ( my $bdata = $bsth->fetchrow_hashref ) {
1141 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1143 # FIXME - We build an authorised values hash here, using the default framework
1144 # though it is possible to have different authvals for different fws.
1146 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1148 # get notforloan authorised value list (see $shelflocations FIXME)
1149 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1151 #Build itemtype hash
1152 #find itemtype & itemtype image
1156 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1159 while ( my $bdata = $bsth->fetchrow_hashref ) {
1160 foreach (qw(description imageurl summary notforloan)) {
1161 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1165 #search item field code
1168 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1171 my ($itemtag) = $sth->fetchrow;
1173 ## find column names of items related to MARC
1174 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1176 my %subfieldstosearch;
1177 while ( ( my $column ) = $sth2->fetchrow ) {
1178 my ( $tagfield, $tagsubfield ) =
1179 &GetMarcFromKohaField( "items." . $column, "" );
1180 $subfieldstosearch{$column} = $tagsubfield;
1183 # handle which records to actually retrieve
1185 if ( $hits && $offset + $results_per_page <= $hits ) {
1186 $times = $offset + $results_per_page;
1189 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1192 my $marcflavour = C4::Context->preference("marcflavour");
1193 # loop through all of the records we've retrieved
1194 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1195 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1196 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1197 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1198 $oldbiblio->{result_number} = $i + 1;
1200 # add imageurl to itemtype if there is one
1201 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1203 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1204 $oldbiblio->{normalized_upc} = GetNormalizedUPC( $marcrecord,$marcflavour);
1205 $oldbiblio->{normalized_ean} = GetNormalizedEAN( $marcrecord,$marcflavour);
1206 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1207 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1208 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1210 # edition information, if any
1211 $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1212 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1213 # Build summary if there is one (the summary is defined in the itemtypes table)
1214 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1215 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1216 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1217 my @fields = $marcrecord->fields();
1218 foreach my $field (@fields) {
1219 my $tag = $field->tag();
1220 my $tagvalue = $field->as_string();
1222 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1223 unless ( $tag < 10 ) {
1224 my @subf = $field->subfields;
1225 for my $i ( 0 .. $#subf ) {
1226 my $subfieldcode = $subf[$i][0];
1227 my $subfieldvalue = $subf[$i][1];
1228 my $tagsubf = $tag . $subfieldcode;
1230 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1235 $summary =~ s/\[(.*?)]//g;
1236 $summary =~ s/\n/<br\/>/g;
1237 $oldbiblio->{summary} = $summary;
1240 # Pull out the items fields
1241 my @fields = $marcrecord->field($itemtag);
1243 # Setting item statuses for display
1244 my @available_items_loop;
1245 my @onloan_items_loop;
1246 my @other_items_loop;
1248 my $available_items;
1252 my $ordered_count = 0;
1253 my $available_count = 0;
1254 my $onloan_count = 0;
1255 my $longoverdue_count = 0;
1256 my $other_count = 0;
1257 my $wthdrawn_count = 0;
1258 my $itemlost_count = 0;
1259 my $itembinding_count = 0;
1260 my $itemdamaged_count = 0;
1261 my $item_in_transit_count = 0;
1262 my $can_place_holds = 0;
1263 my $items_count = scalar(@fields);
1265 ( C4::Context->preference('maxItemsinSearchResults') )
1266 ? C4::Context->preference('maxItemsinSearchResults') - 1
1269 # loop through every item
1270 foreach my $field (@fields) {
1273 # populate the items hash
1274 foreach my $code ( keys %subfieldstosearch ) {
1275 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1277 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1278 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1279 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1280 if ($item->{$hbranch}) {
1281 $item->{'branchname'} = $branches{$item->{$hbranch}};
1283 elsif ($item->{$otherbranch}) { # Last resort
1284 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1287 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1288 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1289 if ( $item->{onloan} ) {
1291 my $key = $prefix . $item->{onloan} . $item->{barcode};
1292 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1293 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1294 $onloan_items->{$key}->{branchname} = $item->{branchname};
1295 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1296 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1297 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1298 # if something's checked out and lost, mark it as 'long overdue'
1299 if ( $item->{itemlost} ) {
1300 $onloan_items->{$prefix}->{longoverdue}++;
1301 $longoverdue_count++;
1302 } else { # can place holds as long as item isn't lost
1303 $can_place_holds = 1;
1307 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1311 if ( $item->{notforloan} == -1 ) {
1315 # is item in transit?
1316 my $transfertwhen = '';
1317 my ($transfertfrom, $transfertto);
1319 unless ($item->{wthdrawn}
1320 || $item->{itemlost}
1322 || $item->{notforloan}
1323 || $items_count > 20) {
1325 # A couple heuristics to limit how many times
1326 # we query the database for item transfer information, sacrificing
1327 # accuracy in some cases for speed;
1329 # 1. don't query if item has one of the other statuses
1330 # 2. don't check transit status if the bib has
1331 # more than 20 items
1333 # FIXME: to avoid having the query the database like this, and to make
1334 # the in transit status count as unavailable for search limiting,
1335 # should map transit status to record indexed in Zebra.
1337 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1340 # item is withdrawn, lost or damaged
1341 if ( $item->{wthdrawn}
1342 || $item->{itemlost}
1344 || $item->{notforloan}
1345 || ($transfertwhen ne ''))
1347 $wthdrawn_count++ if $item->{wthdrawn};
1348 $itemlost_count++ if $item->{itemlost};
1349 $itemdamaged_count++ if $item->{damaged};
1350 $item_in_transit_count++ if $transfertwhen ne '';
1351 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1354 my $key = $prefix . $item->{status};
1355 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1356 $other_items->{$key}->{$_} = $item->{$_};
1358 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1359 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1360 $other_items->{$key}->{count}++ if $item->{$hbranch};
1361 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1362 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1366 $can_place_holds = 1;
1368 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1369 foreach (qw(branchname itemcallnumber)) {
1370 $available_items->{$prefix}->{$_} = $item->{$_};
1372 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1373 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1376 } # notforloan, item level and biblioitem level
1377 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1379 ( C4::Context->preference('maxItemsinSearchResults') )
1380 ? C4::Context->preference('maxItemsinSearchResults') - 1
1382 for my $key ( sort keys %$onloan_items ) {
1383 (++$onloanitemscount > $maxitems) and last;
1384 push @onloan_items_loop, $onloan_items->{$key};
1386 for my $key ( sort keys %$other_items ) {
1387 (++$otheritemscount > $maxitems) and last;
1388 push @other_items_loop, $other_items->{$key};
1390 for my $key ( sort keys %$available_items ) {
1391 (++$availableitemscount > $maxitems) and last;
1392 push @available_items_loop, $available_items->{$key}
1395 # XSLT processing of some stuff
1396 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1397 $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1398 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1401 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1402 $can_place_holds = 0
1403 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1404 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1405 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1406 $oldbiblio->{items_count} = $items_count;
1407 $oldbiblio->{available_items_loop} = \@available_items_loop;
1408 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1409 $oldbiblio->{other_items_loop} = \@other_items_loop;
1410 $oldbiblio->{availablecount} = $available_count;
1411 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1412 $oldbiblio->{onloancount} = $onloan_count;
1413 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1414 $oldbiblio->{othercount} = $other_count;
1415 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1416 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1417 $oldbiblio->{itemlostcount} = $itemlost_count;
1418 $oldbiblio->{damagedcount} = $itemdamaged_count;
1419 $oldbiblio->{intransitcount} = $item_in_transit_count;
1420 $oldbiblio->{orderedcount} = $ordered_count;
1421 push( @newresults, $oldbiblio );
1426 #----------------------------------------------------------------------
1428 # Non-Zebra GetRecords#
1429 #----------------------------------------------------------------------
1433 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1439 $query, $simple_query, $sort_by_ref, $servers_ref,
1440 $results_per_page, $offset, $expanded_facet, $branches,
1443 warn "query =$query" if $DEBUG;
1444 my $result = NZanalyse($query);
1445 warn "results =$result" if $DEBUG;
1447 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1453 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1454 the list is built from an inverted index in the nozebra SQL table
1455 note that title is here only for convenience : the sorting will be very fast when requested on title
1456 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1461 my ( $string, $server ) = @_;
1462 # warn "---------" if $DEBUG;
1463 warn " NZanalyse" if $DEBUG;
1464 # warn "---------" if $DEBUG;
1466 # $server contains biblioserver or authorities, depending on what we search on.
1467 #warn "querying : $string on $server";
1468 $server = 'biblioserver' unless $server;
1470 # if we have a ", replace the content to discard temporarily any and/or/not inside
1472 if ( $string =~ /"/ ) {
1473 $string =~ s/"(.*?)"/__X__/;
1475 warn "commacontent : $commacontent" if $DEBUG;
1478 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1479 # then, call again NZanalyse with $left and $right
1480 # (recursive until we find a leaf (=> something without and/or/not)
1481 # delete repeated operator... Would then go in infinite loop
1482 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1485 #process parenthesis before.
1486 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1489 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1491 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1493 my $leftresult = NZanalyse( $left, $server );
1495 my $rightresult = NZanalyse( $right, $server );
1497 # OK, we have the results for right and left part of the query
1498 # depending of operand, intersect, union or exclude both lists
1499 # to get a result list
1500 if ( $operator eq ' and ' ) {
1501 return NZoperatorAND($leftresult,$rightresult);
1503 elsif ( $operator eq ' or ' ) {
1505 # just merge the 2 strings
1506 return $leftresult . $rightresult;
1508 elsif ( $operator eq ' not ' ) {
1509 return NZoperatorNOT($leftresult,$rightresult);
1513 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1517 warn "string :" . $string if $DEBUG;
1521 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1524 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1526 warn "no parenthesis. left : $left operator: $operator right: $right"
1529 # it's not a leaf, we have a and/or/not
1532 # reintroduce comma content if needed
1533 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1534 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1535 warn "node : $left / $operator / $right\n" if $DEBUG;
1536 my $leftresult = NZanalyse( $left, $server );
1537 my $rightresult = NZanalyse( $right, $server );
1538 warn " leftresult : $leftresult" if $DEBUG;
1539 warn " rightresult : $rightresult" if $DEBUG;
1540 # OK, we have the results for right and left part of the query
1541 # depending of operand, intersect, union or exclude both lists
1542 # to get a result list
1543 if ( $operator eq ' and ' ) {
1545 return NZoperatorAND($leftresult,$rightresult);
1547 elsif ( $operator eq ' or ' ) {
1549 # just merge the 2 strings
1550 return $leftresult . $rightresult;
1552 elsif ( $operator eq ' not ' ) {
1553 return NZoperatorNOT($leftresult,$rightresult);
1557 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1558 die "error : operand unknown : $operator for $string";
1561 # it's a leaf, do the real SQL query and return the result
1564 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1565 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1566 #remove trailing blank at the beginning
1568 warn "leaf:$string" if $DEBUG;
1570 # parse the string in in operator/operand/value again
1574 if ($string =~ /(.*)(>=|<=)(.*)/) {
1581 # warn "handling leaf... left:$left operator:$operator right:$right"
1583 unless ($operator) {
1584 if ($string =~ /(.*)(>|<|=)(.*)/) {
1589 "handling unless (operator)... left:$left operator:$operator right:$right"
1597 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1600 # automatic replace for short operators
1601 $left = 'title' if $left =~ '^ti$';
1602 $left = 'author' if $left =~ '^au$';
1603 $left = 'publisher' if $left =~ '^pb$';
1604 $left = 'subject' if $left =~ '^su$';
1605 $left = 'koha-Auth-Number' if $left =~ '^an$';
1606 $left = 'keyword' if $left =~ '^kw$';
1607 $left = 'itemtype' if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1608 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1609 my $dbh = C4::Context->dbh;
1610 if ( $operator && $left ne 'keyword' ) {
1611 #do a specific search
1612 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1613 my $sth = $dbh->prepare(
1614 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1616 warn "$left / $operator / $right\n" if $DEBUG;
1618 # split each word, query the DB and build the biblionumbers result
1619 #sanitizing leftpart
1620 $left =~ s/^\s+|\s+$//;
1621 foreach ( split / /, $right ) {
1623 $_ =~ s/^\s+|\s+$//;
1625 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1626 $sth->execute( $server, $left, $_ )
1627 or warn "execute failed: $!";
1628 while ( my ( $line, $value ) = $sth->fetchrow ) {
1630 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1631 # otherwise, fill the result
1632 $biblionumbers .= $line
1633 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1634 warn "result : $value "
1635 . ( $right =~ /\d/ ) . "=="
1636 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1639 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1641 warn "NZAND" if $DEBUG;
1642 $results = NZoperatorAND($biblionumbers,$results);
1644 $results = $biblionumbers;
1649 #do a complete search (all indexes), if index='kw' do complete search too.
1650 my $sth = $dbh->prepare(
1651 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1654 # split each word, query the DB and build the biblionumbers result
1655 foreach ( split / /, $string ) {
1656 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1657 warn "search on all indexes on $_" if $DEBUG;
1660 $sth->execute( $server, $_ );
1661 while ( my $line = $sth->fetchrow ) {
1662 $biblionumbers .= $line;
1665 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1667 $results = NZoperatorAND($biblionumbers,$results);
1670 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1671 $results = $biblionumbers;
1675 warn "return : $results for LEAF : $string" if $DEBUG;
1678 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1682 my ($rightresult, $leftresult)=@_;
1684 my @leftresult = split /;/, $leftresult;
1685 warn " @leftresult / $rightresult \n" if $DEBUG;
1687 # my @rightresult = split /;/,$leftresult;
1690 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1691 # the result is stored twice, to have the same weight for AND than OR.
1692 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1693 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1694 foreach (@leftresult) {
1697 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1698 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1699 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1701 "$value-$countvalue;$value-$countvalue;";
1704 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1705 return $finalresult;
1709 my ($rightresult, $leftresult)=@_;
1710 return $rightresult.$leftresult;
1714 my ($leftresult, $rightresult)=@_;
1716 my @leftresult = split /;/, $leftresult;
1718 # my @rightresult = split /;/,$leftresult;
1720 foreach (@leftresult) {
1722 $value=$1 if $value=~m/(.*)-\d+$/;
1723 unless ($rightresult =~ "$value-") {
1724 $finalresult .= "$_;";
1727 return $finalresult;
1732 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1739 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1740 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1742 # order title asc by default
1743 # $ordering = '1=36 <i' unless $ordering;
1744 $results_per_page = 20 unless $results_per_page;
1745 $offset = 0 unless $offset;
1746 my $dbh = C4::Context->dbh;
1749 # order by POPULARITY
1751 if ( $ordering =~ /popularity/ ) {
1755 # popularity is not in MARC record, it's builded from a specific query
1757 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1758 foreach ( split /;/, $biblionumbers ) {
1759 my ( $biblionumber, $title ) = split /,/, $_;
1760 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1761 $sth->execute($biblionumber);
1762 my $popularity = $sth->fetchrow || 0;
1764 # hint : the key is popularity.title because we can have
1765 # many results with the same popularity. In this case, sub-ordering is done by title
1766 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1767 # (un-frequent, I agree, but we won't forget anything that way ;-)
1768 $popularity{ sprintf( "%10d", $popularity ) . $title
1769 . $biblionumber } = $biblionumber;
1772 # sort the hash and return the same structure as GetRecords (Zebra querying)
1775 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1776 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1777 $result_hash->{'RECORDS'}[ $numbers++ ] =
1778 $result{ $popularity{$key} }->as_usmarc();
1781 else { # sort popularity ASC
1782 foreach my $key ( sort ( keys %popularity ) ) {
1783 $result_hash->{'RECORDS'}[ $numbers++ ] =
1784 $result{ $popularity{$key} }->as_usmarc();
1787 my $finalresult = ();
1788 $result_hash->{'hits'} = $numbers;
1789 $finalresult->{'biblioserver'} = $result_hash;
1790 return $finalresult;
1796 elsif ( $ordering =~ /author/ ) {
1798 foreach ( split /;/, $biblionumbers ) {
1799 my ( $biblionumber, $title ) = split /,/, $_;
1800 my $record = GetMarcBiblio($biblionumber);
1802 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1803 $author = $record->subfield( '200', 'f' );
1804 $author = $record->subfield( '700', 'a' ) unless $author;
1807 $author = $record->subfield( '100', 'a' );
1810 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1811 # and we don't want to get only 1 result for each of them !!!
1812 $result{ $author . $biblionumber } = $record;
1815 # sort the hash and return the same structure as GetRecords (Zebra querying)
1818 if ( $ordering eq 'author_za' ) { # sort by author desc
1819 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1820 $result_hash->{'RECORDS'}[ $numbers++ ] =
1821 $result{$key}->as_usmarc();
1824 else { # sort by author ASC
1825 foreach my $key ( sort ( keys %result ) ) {
1826 $result_hash->{'RECORDS'}[ $numbers++ ] =
1827 $result{$key}->as_usmarc();
1830 my $finalresult = ();
1831 $result_hash->{'hits'} = $numbers;
1832 $finalresult->{'biblioserver'} = $result_hash;
1833 return $finalresult;
1836 # ORDER BY callnumber
1839 elsif ( $ordering =~ /callnumber/ ) {
1841 foreach ( split /;/, $biblionumbers ) {
1842 my ( $biblionumber, $title ) = split /,/, $_;
1843 my $record = GetMarcBiblio($biblionumber);
1845 my $frameworkcode = GetFrameworkCode($biblionumber);
1846 my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField( 'items.itemcallnumber', $frameworkcode);
1847 ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber', $frameworkcode)
1848 unless $callnumber_tag;
1849 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1850 $callnumber = $record->subfield( '200', 'f' );
1852 $callnumber = $record->subfield( '100', 'a' );
1855 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1856 # and we don't want to get only 1 result for each of them !!!
1857 $result{ $callnumber . $biblionumber } = $record;
1860 # sort the hash and return the same structure as GetRecords (Zebra querying)
1863 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1864 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1865 $result_hash->{'RECORDS'}[ $numbers++ ] =
1866 $result{$key}->as_usmarc();
1869 else { # sort by title ASC
1870 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1871 $result_hash->{'RECORDS'}[ $numbers++ ] =
1872 $result{$key}->as_usmarc();
1875 my $finalresult = ();
1876 $result_hash->{'hits'} = $numbers;
1877 $finalresult->{'biblioserver'} = $result_hash;
1878 return $finalresult;
1880 elsif ( $ordering =~ /pubdate/ ) { #pub year
1882 foreach ( split /;/, $biblionumbers ) {
1883 my ( $biblionumber, $title ) = split /,/, $_;
1884 my $record = GetMarcBiblio($biblionumber);
1885 my ( $publicationyear_tag, $publicationyear_subfield ) =
1886 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1887 my $publicationyear =
1888 $record->subfield( $publicationyear_tag,
1889 $publicationyear_subfield );
1891 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1892 # and we don't want to get only 1 result for each of them !!!
1893 $result{ $publicationyear . $biblionumber } = $record;
1896 # sort the hash and return the same structure as GetRecords (Zebra querying)
1899 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1900 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1901 $result_hash->{'RECORDS'}[ $numbers++ ] =
1902 $result{$key}->as_usmarc();
1905 else { # sort by pub year ASC
1906 foreach my $key ( sort ( keys %result ) ) {
1907 $result_hash->{'RECORDS'}[ $numbers++ ] =
1908 $result{$key}->as_usmarc();
1911 my $finalresult = ();
1912 $result_hash->{'hits'} = $numbers;
1913 $finalresult->{'biblioserver'} = $result_hash;
1914 return $finalresult;
1920 elsif ( $ordering =~ /title/ ) {
1922 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1924 foreach ( split /;/, $biblionumbers ) {
1925 my ( $biblionumber, $title ) = split /,/, $_;
1927 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1928 # and we don't want to get only 1 result for each of them !!!
1929 # hint & speed improvement : we can order without reading the record
1930 # so order, and read records only for the requested page !
1931 $result{ $title . $biblionumber } = $biblionumber;
1934 # sort the hash and return the same structure as GetRecords (Zebra querying)
1937 if ( $ordering eq 'title_az' ) { # sort by title desc
1938 foreach my $key ( sort ( keys %result ) ) {
1939 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1942 else { # sort by title ASC
1943 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1944 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1948 # limit the $results_per_page to result size if it's more
1949 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
1951 # for the requested page, replace biblionumber by the complete record
1952 # speed improvement : avoid reading too much things
1954 my $counter = $offset ;
1955 $counter <= $offset + $results_per_page ;
1959 $result_hash->{'RECORDS'}[$counter] =
1960 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
1962 my $finalresult = ();
1963 $result_hash->{'hits'} = $numbers;
1964 $finalresult->{'biblioserver'} = $result_hash;
1965 return $finalresult;
1972 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
1975 foreach ( split /;/, $biblionumbers ) {
1976 my ( $biblionumber, $title ) = split /,/, $_;
1977 $title =~ /(.*)-(\d)/;
1982 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
1983 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
1984 # biblio N has ranking = 6
1985 $count_ranking{$biblionumber} += $ranking;
1988 # build the result by "inverting" the count_ranking hash
1989 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
1991 foreach ( keys %count_ranking ) {
1992 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
1995 # sort the hash and return the same structure as GetRecords (Zebra querying)
1998 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1999 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2002 # limit the $results_per_page to result size if it's more
2003 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2005 # for the requested page, replace biblionumber by the complete record
2006 # speed improvement : avoid reading too much things
2008 my $counter = $offset ;
2009 $counter <= $offset + $results_per_page ;
2013 $result_hash->{'RECORDS'}[$counter] =
2014 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2015 if $result_hash->{'RECORDS'}[$counter];
2017 my $finalresult = ();
2018 $result_hash->{'hits'} = $numbers;
2019 $finalresult->{'biblioserver'} = $result_hash;
2020 return $finalresult;
2024 =head2 enabled_staff_search_views
2026 %hash = enabled_staff_search_views()
2028 This function returns a hash that contains three flags obtained from the system
2029 preferences, used to determine whether a particular staff search results view
2034 =item C<Output arg:>
2036 * $hash{can_view_MARC} is true only if the MARC view is enabled
2037 * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2038 * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2040 =item C<usage in the script:>
2044 $template->param ( C4::Search::enabled_staff_search_views );
2048 sub enabled_staff_search_views
2051 can_view_MARC => C4::Context->preference('viewMARC'), # 1 if the staff search allows the MARC view
2052 can_view_ISBD => C4::Context->preference('viewISBD'), # 1 if the staff search allows the ISBD view
2053 can_view_labeledMARC => C4::Context->preference('viewLabeledMARC'), # 1 if the staff search allows the Labeled MARC view
2058 =head2 z3950_search_args
2060 $arrayref = z3950_search_args($matchpoints)
2062 This function returns an array reference that contains the search parameters to be
2063 passed to the Z39.50 search script (z3950_search.pl). The array elements
2064 are hash refs whose keys are name, value and encvalue, and whose values are the
2065 name of a search parameter, the value of that search parameter and the URL encoded
2066 value of that parameter.
2068 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2070 The search parameter values are obtained from the bibliographic record whose
2071 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2073 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2074 a general purpose search argument. In this case, the returned array contains only
2075 entry: the key is 'title' and the value and encvalue are derived from $matchpoints.
2077 If a search parameter value is undefined or empty, it is not included in the returned
2080 The returned array reference may be passed directly to the template parameters.
2084 =item C<Output arg:>
2086 * $array containing hash refs as described above
2088 =item C<usage in the script:>
2092 $data = Biblio::GetBiblioData($bibno);
2093 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2097 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2101 sub z3950_search_args {
2103 $bibrec = { title => $bibrec } if !ref $bibrec;
2105 for my $field (qw/ lccn isbn issn title author dewey subject /)
2107 my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field});
2108 push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field};
2114 END { } # module clean-up code here (global destructor)
2121 Koha Developement team <info@koha.org>