3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Dates qw(format_date);
26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
28 # set the version for version checking
31 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
36 C4::Search - Functions for searching the Koha catalog.
40 See opac/opac-search.pl or catalogue/search.pl for example of usage
44 This module provides searching functions for Koha's bibliographic databases
62 # make all your functions, whether exported or not;
64 =head2 findseealso($dbh,$fields);
66 C<$dbh> is a link to the DB handler.
69 my $dbh =C4::Context->dbh;
71 C<$fields> is a reference to the fields array
73 This function modifies the @$fields array and adds related fields to search on.
75 FIXME: this function is probably deprecated in Koha 3
80 my ( $dbh, $fields ) = @_;
81 my $tagslib = GetMarcStructure(1);
82 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
83 my ($tag) = substr( @$fields[$i], 1, 3 );
84 my ($subfield) = substr( @$fields[$i], 4, 1 );
85 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
86 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
92 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
94 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
100 my $dbh = C4::Context->dbh;
101 my $result = TransformMarcToKoha( $dbh, $record, '' );
106 my ( $biblionumber, $title );
108 # search duplicate on ISBN, easy and fast..
109 # ... normalize first
110 if ( $result->{isbn} ) {
111 $result->{isbn} =~ s/\(.*$//;
112 $result->{isbn} =~ s/\s+$//;
113 $query = "isbn=$result->{isbn}";
116 $result->{title} =~ s /\\//g;
117 $result->{title} =~ s /\"//g;
118 $result->{title} =~ s /\(//g;
119 $result->{title} =~ s /\)//g;
121 # FIXME: instead of removing operators, could just do
122 # quotes around the value
123 $result->{title} =~ s/(and|or|not)//g;
124 $query = "ti,ext=$result->{title}";
125 $query .= " and itemtype=$result->{itemtype}"
126 if ( $result->{itemtype} );
127 if ( $result->{author} ) {
128 $result->{author} =~ s /\\//g;
129 $result->{author} =~ s /\"//g;
130 $result->{author} =~ s /\(//g;
131 $result->{author} =~ s /\)//g;
133 # remove valid operators
134 $result->{author} =~ s/(and|or|not)//g;
135 $query .= " and au,ext=$result->{author}";
139 # FIXME: add error handling
140 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
142 foreach my $possible_duplicate_record (@$searchresults) {
144 MARC::Record->new_from_usmarc($possible_duplicate_record);
145 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
147 # FIXME :: why 2 $biblionumber ?
149 push @results, $result->{'biblionumber'};
150 push @results, $result->{'title'};
158 ($error,$results) = SimpleSearch($query,@servers);
160 This function provides a simple search API on the bibliographic catalog
166 * $query can be a simple keyword or a complete CCL query
167 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
170 * $error is a empty unless an error is detected
171 * \@results is an array of records.
173 =item C<usage in the script:>
177 my ($error, $marcresults) = SimpleSearch($query);
179 if (defined $error) {
180 $template->param(query_error => $error);
181 warn "error: ".$error;
182 output_html_with_http_headers $input, $cookie, $template->output;
186 my $hits = scalar @$marcresults;
189 for(my $i=0;$i<$hits;$i++) {
191 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
192 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
194 #build the hash for the template.
195 $resultsloop{highlight} = ($i % 2)?(1):(0);
196 $resultsloop{title} = $biblio->{'title'};
197 $resultsloop{subtitle} = $biblio->{'subtitle'};
198 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
199 $resultsloop{author} = $biblio->{'author'};
200 $resultsloop{publishercode} = $biblio->{'publishercode'};
201 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
203 push @results, \%resultsloop;
206 $template->param(result=>\@results);
212 if ( C4::Context->preference('NoZebra') ) {
213 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
216 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
217 return ( undef, $search_result );
224 return ( "No query entered", undef ) unless $query;
226 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
227 @servers = ("biblioserver") unless @servers;
229 # Initialize & Search Zebra
230 for ( my $i = 0 ; $i < @servers ; $i++ ) {
232 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
235 ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
239 $zconns[$i]->errmsg() . " ("
240 . $zconns[$i]->errcode() . ") "
241 . $zconns[$i]->addinfo() . " "
242 . $zconns[$i]->diagset();
244 return ( $error, undef ) if $zconns[$i]->errcode();
248 # caught a ZOOM::Exception
252 . $@->addinfo() . " "
255 return ( $error, undef );
260 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
261 $ev = $zconns[ $i - 1 ]->last_event();
262 if ( $ev == ZOOM::Event::ZEND ) {
263 $hits = $tmpresults[ $i - 1 ]->size();
266 for ( my $j = 0 ; $j < $hits ; $j++ ) {
267 my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
268 push @results, $record;
273 return ( undef, \@results );
279 ( undef, $results_hashref, \@facets_loop ) = getRecords (
281 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
282 $results_per_page, $offset, $expanded_facet, $branches,
286 The all singing, all dancing, multi-server, asynchronous, scanning,
287 searching, record nabbing, facet-building
289 See verbse embedded documentation.
295 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
296 $results_per_page, $offset, $expanded_facet, $branches,
300 my @servers = @$servers_ref;
301 my @sort_by = @$sort_by_ref;
303 # Initialize variables for the ZOOM connection and results object
307 my $results_hashref = ();
309 # Initialize variables for the faceted results objects
310 my $facets_counter = ();
311 my $facets_info = ();
312 my $facets = getFacets();
315 ; # stores the ref to array of hashes for template facets loop
317 ### LOOP THROUGH THE SERVERS
318 for ( my $i = 0 ; $i < @servers ; $i++ ) {
319 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
321 # perform the search, create the results objects
322 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
324 if ( $servers[$i] =~ /biblioserver/ ) {
325 $query_to_use = $koha_query;
328 $query_to_use = $simple_query;
331 #$query_to_use = $simple_query if $scan;
332 warn $simple_query if ( $scan and $DEBUG );
334 # Check if we've got a query_type defined, if so, use it
338 if ( $query_type =~ /^ccl/ ) {
340 s/\:/\=/g; # change : to = last minute (FIXME)
343 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
346 elsif ( $query_type =~ /^cql/ ) {
349 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
351 elsif ( $query_type =~ /^pqf/ ) {
354 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
361 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
367 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
373 warn "WARNING: query problem with $query_to_use " . $@;
376 # Concatenate the sort_by limits and pass them to the results object
377 # Note: sort will override rank
379 foreach my $sort (@sort_by) {
380 if ( $sort eq "author_az" ) {
381 $sort_by .= "1=1003 <i ";
383 elsif ( $sort eq "author_za" ) {
384 $sort_by .= "1=1003 >i ";
386 elsif ( $sort eq "popularity_asc" ) {
387 $sort_by .= "1=9003 <i ";
389 elsif ( $sort eq "popularity_dsc" ) {
390 $sort_by .= "1=9003 >i ";
392 elsif ( $sort eq "call_number_asc" ) {
393 $sort_by .= "1=20 <i ";
395 elsif ( $sort eq "call_number_dsc" ) {
396 $sort_by .= "1=20 >i ";
398 elsif ( $sort eq "pubdate_asc" ) {
399 $sort_by .= "1=31 <i ";
401 elsif ( $sort eq "pubdate_dsc" ) {
402 $sort_by .= "1=31 >i ";
404 elsif ( $sort eq "acqdate_asc" ) {
405 $sort_by .= "1=32 <i ";
407 elsif ( $sort eq "acqdate_dsc" ) {
408 $sort_by .= "1=32 >i ";
410 elsif ( $sort eq "title_az" ) {
411 $sort_by .= "1=4 <i ";
413 elsif ( $sort eq "title_za" ) {
414 $sort_by .= "1=4 >i ";
418 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
419 warn "WARNING sort $sort_by failed";
422 } # finished looping through servers
424 # The big moment: asynchronously retrieve results from all servers
425 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
426 my $ev = $zconns[ $i - 1 ]->last_event();
427 if ( $ev == ZOOM::Event::ZEND ) {
428 next unless $results[ $i - 1 ];
429 my $size = $results[ $i - 1 ]->size();
433 # loop through the results
434 $results_hash->{'hits'} = $size;
436 if ( $offset + $results_per_page <= $size ) {
437 $times = $offset + $results_per_page;
442 for ( my $j = $offset ; $j < $times ; $j++ ) {
447 ## Check if it's an index scan
449 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
451 # here we create a minimal MARC record and hand it off to the
452 # template just like a normal result ... perhaps not ideal, but
454 my $tmprecord = MARC::Record->new();
455 $tmprecord->encoding('UTF-8');
459 # the minimal record in author/title (depending on MARC flavour)
460 if ( C4::Context->preference("marcflavour") eq
463 $tmptitle = MARC::Field->new(
471 MARC::Field->new( '245', ' ', ' ', a => $term, );
473 MARC::Field->new( '100', ' ', ' ', a => $occ, );
475 $tmprecord->append_fields($tmptitle);
476 $tmprecord->append_fields($tmpauthor);
477 $results_hash->{'RECORDS'}[$j] =
478 $tmprecord->as_usmarc();
483 $record = $results[ $i - 1 ]->record($j)->raw();
485 # warn "RECORD $j:".$record;
486 $results_hash->{'RECORDS'}[$j] = $record;
488 # Fill the facets while we're looping, but only for the biblioserver
489 $facet_record = MARC::Record->new_from_usmarc($record)
490 if $servers[ $i - 1 ] =~ /biblioserver/;
492 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
494 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
496 if ( $facets->[$k] ) {
498 for my $tag ( @{ $facets->[$k]->{'tags'} } )
501 $facet_record->field($tag);
503 for my $field (@fields) {
504 my @subfields = $field->subfields();
505 for my $subfield (@subfields) {
506 my ( $code, $data ) = @$subfield;
508 $facets->[$k]->{'subfield'} )
510 $facets_counter->{ $facets->[$k]
516 $facets_info->{ $facets->[$k]
517 ->{'link_value'} }->{'label_value'} =
518 $facets->[$k]->{'label_value'};
519 $facets_info->{ $facets->[$k]
520 ->{'link_value'} }->{'expanded'} =
521 $facets->[$k]->{'expanded'};
527 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
530 # warn "connection ", $i-1, ": $size hits";
531 # warn $results[$i-1]->record(0)->render() if $size > 0;
534 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
536 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
537 keys %$facets_counter )
540 my $number_of_facets;
541 my @this_facets_array;
544 $facets_counter->{$link_value}
545 ->{$b} <=> $facets_counter->{$link_value}->{$a}
546 } keys %{ $facets_counter->{$link_value} }
550 if ( ( $number_of_facets < 6 )
551 || ( $expanded_facet eq $link_value )
552 || ( $facets_info->{$link_value}->{'expanded'} ) )
555 # Sanitize the link value ), ( will cause errors with CCL,
556 my $facet_link_value = $one_facet;
557 $facet_link_value =~ s/(\(|\))/ /g;
559 # fix the length that will display in the label,
560 my $facet_label_value = $one_facet;
562 substr( $one_facet, 0, 20 ) . "..."
563 unless length($facet_label_value) <= 20;
565 # if it's a branch, label by the name, not the code,
566 if ( $link_value =~ /branch/ ) {
568 $branches->{$one_facet}->{'branchname'};
571 # but we're down with the whole label being in the link's title.
572 my $facet_title_value = $one_facet;
574 push @this_facets_array,
578 $facets_counter->{$link_value}
580 facet_label_value => $facet_label_value,
581 facet_title_value => $facet_title_value,
582 facet_link_value => $facet_link_value,
583 type_link_value => $link_value,
589 # handle expanded option
590 unless ( $facets_info->{$link_value}->{'expanded'} ) {
592 if ( ( $number_of_facets > 6 )
593 && ( $expanded_facet ne $link_value ) );
598 type_link_value => $link_value,
599 type_id => $link_value . "_id",
601 $facets_info->{$link_value}->{'label_value'},
602 facets => \@this_facets_array,
603 expandable => $expandable,
604 expand => $link_value,
611 return ( undef, $results_hashref, \@facets_loop );
614 use C4::Search::PazPar2;
619 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
620 $results_per_page, $offset, $expanded_facet, $branches,
624 my $paz = C4::Search::PazPar2->new('http://localhost:10006/search.pz2');
626 $paz->search($simple_query);
630 my $results_hashref = {};
631 my $stats = XMLin($paz->stat);
632 $results_hashref->{'biblioserver'}->{'hits'} = $stats->{'hits'};
633 my $results = XMLin($paz->show($offset, $results_per_page), forcearray => 1);
634 #die Dumper($results);
635 HIT: foreach my $hit (@{ $results->{'hit'} }) {
637 my $recid = $hit->{recid}->[0];
638 #if ($recid =~ /[\200-\377]/) {
639 if ($recid =~ /sodot/) {
641 #probably do not want non-ASCII in record ID
645 if (exists $hit->{count}) {
646 $count = $hit->{count}->[0];
649 for (my $i = 0; $i < $count; $i++) {
650 warn "look for $recid offset = $i";
651 my $rec = $paz->record($recid, $i);
652 warn "got record $i";
653 push @{ $results_hashref->{'biblioserver'}->{'RECORDS'} }, $paz->record($recid, $i);
658 # pass through facets
659 my $termlist_xml = $paz->termlist('author,subject');
660 my $terms = XMLin($termlist_xml, forcearray => 1);
661 my @facets_loop = ();
662 foreach my $list (sort keys %{ $terms->{'list'} }) {
664 foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
666 facet_label_value => $facet->{'name'}->[0],
669 push @facets_loop, ( {
675 return ( undef, $results_hashref, \@facets_loop );
679 sub _remove_stopwords {
680 my ( $operand, $index ) = @_;
681 my @stopwords_removed;
683 # phrase and exact-qualified indexes shouldn't have stopwords removed
684 if ( $index !~ m/phr|ext/ ) {
686 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
687 # we use IsAlpha unicode definition, to deal correctly with diacritics.
688 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
689 # is a stopword, we'd get "çon" and wouldn't find anything...
690 foreach ( keys %{ C4::Context->stopwords } ) {
691 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
693 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
695 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
696 $operand =~ s/^$_\P{IsAlpha}/ /gi;
697 $operand =~ s/\P{IsAlpha}$_$/ /gi;
698 push @stopwords_removed, $_;
702 return ( $operand, \@stopwords_removed );
706 sub _detect_truncation {
707 my ( $operand, $index ) = @_;
708 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
711 my @wordlist = split( /\s/, $operand );
712 foreach my $word (@wordlist) {
713 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
714 push @rightlefttruncated, $word;
716 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
717 push @lefttruncated, $word;
719 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
720 push @righttruncated, $word;
722 elsif ( index( $word, "*" ) < 0 ) {
723 push @nontruncated, $word;
726 push @regexpr, $word;
730 \@nontruncated, \@righttruncated, \@lefttruncated,
731 \@rightlefttruncated, \@regexpr
736 sub _build_stemmed_operand {
740 # FIXME: the locale should be set based on the user's language and/or search choice
741 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
743 # FIXME: these should be stored in the db so the librarian can modify the behavior
744 $stemmer->add_exceptions(
751 my @words = split( / /, $operand );
752 my $stems = $stemmer->stem(@words);
753 for my $stem (@$stems) {
754 $stemmed_operand .= "$stem";
755 $stemmed_operand .= "?"
756 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
757 $stemmed_operand .= " ";
759 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
760 return $stemmed_operand;
764 sub _build_weighted_query {
766 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
767 # pretty well but could work much better if we had a smarter query parser
768 my ( $operand, $stemmed_operand, $index ) = @_;
769 my $stemming = C4::Context->preference("QueryStemming") || 0;
770 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
771 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
773 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
775 # Keyword, or, no index specified
776 if ( ( $index eq 'kw' ) || ( !$index ) ) {
778 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
779 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
780 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
781 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
782 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
783 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
784 if $fuzzy_enabled; # add fuzzy, word list
785 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
786 if ( $stemming and $stemmed_operand )
787 ; # add stemming, right truncation
788 $weighted_query .= " or wrdl,r9=\"$operand\"";
790 # embedded sorting: 0 a-z; 1 z-a
791 # $weighted_query .= ") or (sort1,aut=1";
794 # Barcode searches should skip this process
795 elsif ( $index eq 'bc' ) {
796 $weighted_query .= "bc=\"$operand\"";
799 # Authority-number searches should skip this process
800 elsif ( $index eq 'an' ) {
801 $weighted_query .= "an=\"$operand\"";
804 # If the index already has more than one qualifier, wrap the operand
805 # in quotes and pass it back (assumption is that the user knows what they
806 # are doing and won't appreciate us mucking up their query
807 elsif ( $index =~ ',' ) {
808 $weighted_query .= " $index=\"$operand\"";
811 #TODO: build better cases based on specific search indexes
813 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
814 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
815 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
817 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
820 $weighted_query .= "))"; # close rank specification
821 return $weighted_query;
827 $simple_query, $query_cgi,
829 $limit_cgi, $limit_desc,
830 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
832 Build queries and limits in CCL, CGI, Human,
833 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
835 See verbose embedded documentation.
841 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
843 warn "---------" if $DEBUG;
844 warn "Enter buildQuery" if $DEBUG;
845 warn "---------" if $DEBUG;
848 my @operators = @$operators if $operators;
849 my @indexes = @$indexes if $indexes;
850 my @operands = @$operands if $operands;
851 my @limits = @$limits if $limits;
852 my @sort_by = @$sort_by if $sort_by;
854 my $stemming = C4::Context->preference("QueryStemming") || 0;
855 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
856 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
857 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
858 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
860 # no stemming/weight/fuzzy in NoZebra
861 if ( C4::Context->preference("NoZebra") ) {
867 my $query = $operands[0];
868 my $simple_query = $operands[0];
870 # initialize the variables we're passing back
879 my $stopwords_removed; # flag to determine if stopwords have been removed
881 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
883 if ( $query =~ /^ccl=/ ) {
884 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
886 if ( $query =~ /^cql=/ ) {
887 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
889 if ( $query =~ /^pqf=/ ) {
890 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
893 # pass nested queries directly
894 # FIXME: need better handling of some of these variables in this case
895 if ( $query =~ /(\(|\))/ ) {
897 undef, $query, $simple_query, $query_cgi,
898 $query, $limit, $limit_cgi, $limit_desc,
899 $stopwords_removed, 'ccl'
903 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
904 # query operands and indexes and add stemming, truncation, field weighting, etc.
905 # Once we do so, we'll end up with a value in $query, just like if we had an
906 # incoming $query from the user
909 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
911 ; # a flag used to keep track if there was a previous query
912 # if there was, we can apply the current operator
914 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
916 # COMBINE OPERANDS, INDEXES AND OPERATORS
917 if ( $operands[$i] ) {
919 # A flag to determine whether or not to add the index to the query
922 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
923 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
926 $remove_stopwords = 0;
928 my $operand = $operands[$i];
929 my $index = $indexes[$i];
931 # Add index-specific attributes
932 # Date of Publication
933 if ( $index eq 'yr' ) {
934 $index .= ",st-numeric";
937 $stemming, $auto_truncation,
938 $weight_fields, $fuzzy_enabled,
940 ) = ( 0, 0, 0, 0, 0 );
943 # Date of Acquisition
944 elsif ( $index eq 'acqdate' ) {
945 $index .= ",st-date-normalized";
948 $stemming, $auto_truncation,
949 $weight_fields, $fuzzy_enabled,
951 ) = ( 0, 0, 0, 0, 0 );
954 # Set default structure attribute (word list)
956 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
957 $struct_attr = ",wrdl";
960 # Some helpful index variants
961 my $index_plus = $index . $struct_attr . ":" if $index;
962 my $index_plus_comma = $index . $struct_attr . "," if $index;
965 if ($remove_stopwords) {
966 ( $operand, $stopwords_removed ) =
967 _remove_stopwords( $operand, $index );
968 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
969 warn "REMOVED STOPWORDS: @$stopwords_removed"
970 if ( $stopwords_removed && $DEBUG );
974 my ( $nontruncated, $righttruncated, $lefttruncated,
975 $rightlefttruncated, $regexpr );
976 my $truncated_operand;
978 $nontruncated, $righttruncated, $lefttruncated,
979 $rightlefttruncated, $regexpr
980 ) = _detect_truncation( $operand, $index );
982 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
987 scalar(@$righttruncated) + scalar(@$lefttruncated) +
988 scalar(@$rightlefttruncated) > 0 )
991 # Don't field weight or add the index to the query, we do it here
993 undef $weight_fields;
994 my $previous_truncation_operand;
995 if ( scalar(@$nontruncated) > 0 ) {
996 $truncated_operand .= "$index_plus @$nontruncated ";
997 $previous_truncation_operand = 1;
999 if ( scalar(@$righttruncated) > 0 ) {
1000 $truncated_operand .= "and "
1001 if $previous_truncation_operand;
1002 $truncated_operand .=
1003 "$index_plus_comma" . "rtrn:@$righttruncated ";
1004 $previous_truncation_operand = 1;
1006 if ( scalar(@$lefttruncated) > 0 ) {
1007 $truncated_operand .= "and "
1008 if $previous_truncation_operand;
1009 $truncated_operand .=
1010 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1011 $previous_truncation_operand = 1;
1013 if ( scalar(@$rightlefttruncated) > 0 ) {
1014 $truncated_operand .= "and "
1015 if $previous_truncation_operand;
1016 $truncated_operand .=
1017 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1018 $previous_truncation_operand = 1;
1021 $operand = $truncated_operand if $truncated_operand;
1022 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1025 my $stemmed_operand;
1026 $stemmed_operand = _build_stemmed_operand($operand)
1028 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1030 # Handle Field Weighting
1031 my $weighted_operand;
1033 _build_weighted_query( $operand, $stemmed_operand, $index )
1035 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1036 $operand = $weighted_operand if $weight_fields;
1037 $indexes_set = 1 if $weight_fields;
1039 # If there's a previous operand, we need to add an operator
1040 if ($previous_operand) {
1042 # User-specified operator
1043 if ( $operators[ $i - 1 ] ) {
1044 $query .= " $operators[$i-1] ";
1045 $query .= " $index_plus " unless $indexes_set;
1046 $query .= " $operand";
1047 $query_cgi .= "&op=$operators[$i-1]";
1048 $query_cgi .= "&idx=$index" if $index;
1049 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1051 " $operators[$i-1] $index_plus $operands[$i]";
1054 # Default operator is and
1057 $query .= "$index_plus " unless $indexes_set;
1058 $query .= "$operand";
1059 $query_cgi .= "&op=and&idx=$index" if $index;
1060 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1061 $query_desc .= " and $index_plus $operands[$i]";
1065 # There isn't a pervious operand, don't need an operator
1068 # Field-weighted queries already have indexes set
1069 $query .= " $index_plus " unless $indexes_set;
1071 $query_desc .= " $index_plus $operands[$i]";
1072 $query_cgi .= "&idx=$index" if $index;
1073 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1074 $previous_operand = 1;
1079 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1082 my $group_OR_limits;
1083 my $availability_limit;
1084 foreach my $this_limit (@limits) {
1085 if ( $this_limit =~ /available/ ) {
1087 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1089 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1090 $availability_limit .=
1091 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1092 $limit_cgi .= "&limit=available";
1096 # group_OR_limits, prefixed by mc-
1097 # OR every member of the group
1098 elsif ( $this_limit =~ /mc/ ) {
1099 $group_OR_limits .= " or " if $group_OR_limits;
1100 $limit_desc .= " or " if $group_OR_limits;
1101 $group_OR_limits .= "$this_limit";
1102 $limit_cgi .= "&limit=$this_limit";
1103 $limit_desc .= " $this_limit";
1106 # Regular old limits
1108 $limit .= " and " if $limit || $query;
1109 $limit .= "$this_limit";
1110 $limit_cgi .= "&limit=$this_limit";
1111 $limit_desc .= " $this_limit";
1114 if ($group_OR_limits) {
1115 $limit .= " and " if ( $query || $limit );
1116 $limit .= "($group_OR_limits)";
1118 if ($availability_limit) {
1119 $limit .= " and " if ( $query || $limit );
1120 $limit .= "($availability_limit)";
1123 # Normalize the query and limit strings
1126 for ( $query, $query_desc, $limit, $limit_desc ) {
1127 $_ =~ s/ / /g; # remove extra spaces
1128 $_ =~ s/^ //g; # remove any beginning spaces
1129 $_ =~ s/ $//g; # remove any ending spaces
1130 $_ =~ s/==/=/g; # remove double == from query
1132 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1134 for ($query_cgi,$simple_query) {
1137 # append the limit to the query
1138 $query .= " " . $limit;
1142 warn "QUERY:" . $query;
1143 warn "QUERY CGI:" . $query_cgi;
1144 warn "QUERY DESC:" . $query_desc;
1145 warn "LIMIT:" . $limit;
1146 warn "LIMIT CGI:" . $limit_cgi;
1147 warn "LIMIT DESC:" . $limit_desc;
1149 warn "Leave buildQuery";
1153 undef, $query, $simple_query, $query_cgi,
1154 $query_desc, $limit, $limit_cgi, $limit_desc,
1155 $stopwords_removed, $query_type
1159 =head2 searchResults
1161 Format results in a form suitable for passing to the template
1165 # IMO this subroutine is pretty messy still -- it's responsible for
1166 # building the HTML output for the template
1168 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1169 my $dbh = C4::Context->dbh;
1174 # add search-term highlighting via <span>s on the search terms
1175 my $span_terms_hashref;
1176 for my $span_term ( split( / /, $searchdesc ) ) {
1177 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1178 $span_terms_hashref->{$span_term}++;
1181 #Build branchnames hash
1183 #get branch information.....
1186 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1187 ; # FIXME : use C4::Koha::GetBranches
1189 while ( my $bdata = $bsth->fetchrow_hashref ) {
1190 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1195 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1198 while ( my $ldata = $lsch->fetchrow_hashref ) {
1199 $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1202 #Build itemtype hash
1203 #find itemtype & itemtype image
1207 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1210 while ( my $bdata = $bsth->fetchrow_hashref ) {
1211 $itemtypes{ $bdata->{'itemtype'} }->{description} =
1212 $bdata->{'description'};
1213 $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1214 $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'};
1215 $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1216 $bdata->{'notforloan'};
1219 #search item field code
1222 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1225 my ($itemtag) = $sth->fetchrow;
1227 # get notforloan authorised value list
1230 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1233 my ($notforloan_authorised_value) = $sth->fetchrow;
1235 ## find column names of items related to MARC
1236 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1238 my %subfieldstosearch;
1239 while ( ( my $column ) = $sth2->fetchrow ) {
1240 my ( $tagfield, $tagsubfield ) =
1241 &GetMarcFromKohaField( "items." . $column, "" );
1242 $subfieldstosearch{$column} = $tagsubfield;
1245 # handle which records to actually retrieve
1247 if ( $hits && $offset + $results_per_page <= $hits ) {
1248 $times = $offset + $results_per_page;
1254 # loop through all of the records we've retrieved
1255 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1257 $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1258 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1259 $oldbiblio->{result_number} = $i + 1;
1261 # add imageurl to itemtype if there is one
1262 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1263 $oldbiblio->{imageurl} =
1264 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1265 $oldbiblio->{description} =
1266 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1269 $oldbiblio->{imageurl} =
1270 getitemtypeimagesrc() . "/"
1271 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1272 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1273 $oldbiblio->{description} =
1274 $itemtypes{ $oldbiblio->{itemtype} }->{description};
1277 # Build summary if there is one (the summary is defined in the itemtypes table)
1278 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1279 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1280 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1281 my @fields = $marcrecord->fields();
1282 foreach my $field (@fields) {
1283 my $tag = $field->tag();
1284 my $tagvalue = $field->as_string();
1286 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1287 unless ( $tag < 10 ) {
1288 my @subf = $field->subfields;
1289 for my $i ( 0 .. $#subf ) {
1290 my $subfieldcode = $subf[$i][0];
1291 my $subfieldvalue = $subf[$i][1];
1292 my $tagsubf = $tag . $subfieldcode;
1294 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1299 $summary =~ s/\[(.*?)]//g;
1300 $summary =~ s/\n/<br>/g;
1301 $oldbiblio->{summary} = $summary;
1304 # Add search-term highlighting to the whole record where they match using <span>s
1305 if (C4::Context->preference("OpacHighlightedWords")){
1306 my $searchhighlightblob;
1307 for my $highlight_field ( $marcrecord->fields ) {
1309 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1310 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1311 for my $subfield ($highlight_field->subfields()) {
1313 next if $subfield->[0] eq '9';
1314 my $field = $subfield->[1];
1315 for my $term ( keys %$span_terms_hashref ) {
1316 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1317 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1321 $searchhighlightblob .= $field . " ... " if $match;
1325 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1326 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1328 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1329 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1331 # Add search-term highlighting to the title, subtitle, etc. fields
1332 for my $term ( keys %$span_terms_hashref ) {
1333 my $old_term = $term;
1334 if ( length($term) > 3 ) {
1335 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1336 $oldbiblio->{'title'} =~
1337 s/$term/<span class=\"term\">$&<\/span>/gi;
1338 $oldbiblio->{'subtitle'} =~
1339 s/$term/<span class=\"term\">$&<\/span>/gi;
1340 $oldbiblio->{'author'} =~
1341 s/$term/<span class=\"term\">$&<\/span>/gi;
1342 $oldbiblio->{'publishercode'} =~
1343 s/$term/<span class=\"term\">$&<\/span>/gi;
1344 $oldbiblio->{'place'} =~
1345 s/$term/<span class=\"term\">$&<\/span>/gi;
1346 $oldbiblio->{'pages'} =~
1347 s/$term/<span class=\"term\">$&<\/span>/gi;
1348 $oldbiblio->{'notes'} =~
1349 s/$term/<span class=\"term\">$&<\/span>/gi;
1350 $oldbiblio->{'size'} =~
1351 s/$term/<span class=\"term\">$&<\/span>/gi;
1356 # surely there's a better way to handle this
1358 $toggle = "#ffffcc";
1363 $oldbiblio->{'toggle'} = $toggle;
1365 # Pull out the items fields
1366 my @fields = $marcrecord->field($itemtag);
1368 # Setting item statuses for display
1369 my @available_items_loop;
1370 my @onloan_items_loop;
1371 my @other_items_loop;
1373 my $available_items;
1377 my $ordered_count = 0;
1378 my $available_count = 0;
1379 my $onloan_count = 0;
1380 my $longoverdue_count = 0;
1381 my $other_count = 0;
1382 my $wthdrawn_count = 0;
1383 my $itemlost_count = 0;
1384 my $itembinding_count = 0;
1385 my $itemdamaged_count = 0;
1386 my $can_place_holds = 0;
1387 my $items_count = scalar(@fields);
1390 ( C4::Context->preference('maxItemsinSearchResults') )
1391 ? C4::Context->preference('maxItemsinSearchResults') - 1
1394 # loop through every item
1395 foreach my $field (@fields) {
1399 # populate the items hash
1400 foreach my $code ( keys %subfieldstosearch ) {
1401 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1404 # set item's branch name, use homebranch first, fall back to holdingbranch
1405 if ( $item->{'homebranch'} ) {
1406 $item->{'branchname'} = $branches{ $item->{homebranch} };
1410 elsif ( $item->{'holdingbranch'} ) {
1411 $item->{'branchname'} = $branches{ $item->{holdingbranch} };
1414 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1415 if ( $item->{onloan} ) {
1417 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{due_date} = format_date( $item->{onloan} );
1418 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{count}++ if $item->{'homebranch'};
1419 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{branchname} = $item->{'branchname'};
1420 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{location} = $locations{ $item->{location} };
1421 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{itemcallnumber} = $item->{itemcallnumber};
1422 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1423 # if something's checked out and lost, mark it as 'long overdue'
1424 if ( $item->{itemlost} ) {
1425 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{longoverdue}++;
1426 $longoverdue_count++;
1429 # can place holds as long as this item isn't lost
1431 $can_place_holds = 1;
1435 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1439 if ( $item->{notforloan} == -1 ) {
1443 # item is withdrawn, lost or damaged
1444 if ( $item->{wthdrawn}
1445 || $item->{itemlost}
1447 || $item->{notforloan} )
1449 $wthdrawn_count++ if $item->{wthdrawn};
1450 $itemlost_count++ if $item->{itemlost};
1451 $itemdamaged_count++ if $item->{damaged};
1452 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1455 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{wthdrawn} = $item->{wthdrawn};
1456 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemlost} = $item->{itemlost};
1457 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{damaged} = $item->{damaged};
1458 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{notforloan} = GetAuthorisedValueDesc( '', '', $item->{notforloan}, '', '', $notforloan_authorised_value ) if $notforloan_authorised_value;
1459 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{count}++ if $item->{'homebranch'};
1460 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{branchname} = $item->{'branchname'};
1461 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{location} = $locations{ $item->{location} };
1462 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1463 $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1468 $can_place_holds = 1;
1470 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1471 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1472 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{location} = $locations{ $item->{location} };
1473 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1474 $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1477 } # notforloan, item level and biblioitem level
1478 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1480 ( C4::Context->preference('maxItemsinSearchResults') )
1481 ? C4::Context->preference('maxItemsinSearchResults') - 1
1483 for my $key ( sort keys %$onloan_items ) {
1484 $onloanitemscount++;
1485 push @onloan_items_loop, $onloan_items->{$key}
1486 unless $onloanitemscount > $maxitems;
1488 for my $key ( sort keys %$other_items ) {
1490 push @other_items_loop, $other_items->{$key}
1491 unless $otheritemscount > $maxitems;
1493 for my $key ( sort keys %$available_items ) {
1494 $availableitemscount++;
1495 push @available_items_loop, $available_items->{$key}
1496 unless $availableitemscount > $maxitems;
1499 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1500 $can_place_holds = 0
1501 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1502 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1503 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1504 $oldbiblio->{items_count} = $items_count;
1505 $oldbiblio->{available_items_loop} = \@available_items_loop;
1506 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1507 $oldbiblio->{other_items_loop} = \@other_items_loop;
1508 $oldbiblio->{availablecount} = $available_count;
1509 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1510 $oldbiblio->{onloancount} = $onloan_count;
1511 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1512 $oldbiblio->{othercount} = $other_count;
1513 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1514 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1515 $oldbiblio->{itemlostcount} = $itemlost_count;
1516 $oldbiblio->{damagedcount} = $itemdamaged_count;
1517 $oldbiblio->{orderedcount} = $ordered_count;
1518 $oldbiblio->{isbn} =~
1519 s/-//g; # deleting - in isbn to enable amazon content
1520 push( @newresults, $oldbiblio );
1525 #----------------------------------------------------------------------
1527 # Non-Zebra GetRecords#
1528 #----------------------------------------------------------------------
1532 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1538 $query, $simple_query, $sort_by_ref, $servers_ref,
1539 $results_per_page, $offset, $expanded_facet, $branches,
1542 warn "query =$query" if $DEBUG;
1543 my $result = NZanalyse($query);
1544 warn "results =$result" if $DEBUG;
1546 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1552 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1553 the list is built from an inverted index in the nozebra SQL table
1554 note that title is here only for convenience : the sorting will be very fast when requested on title
1555 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1560 my ( $string, $server ) = @_;
1561 # warn "---------" if $DEBUG;
1562 warn " NZanalyse" if $DEBUG;
1563 # warn "---------" if $DEBUG;
1565 # $server contains biblioserver or authorities, depending on what we search on.
1566 #warn "querying : $string on $server";
1567 $server = 'biblioserver' unless $server;
1569 # if we have a ", replace the content to discard temporarily any and/or/not inside
1571 if ( $string =~ /"/ ) {
1572 $string =~ s/"(.*?)"/__X__/;
1574 warn "commacontent : $commacontent" if $DEBUG;
1577 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1578 # then, call again NZanalyse with $left and $right
1579 # (recursive until we find a leaf (=> something without and/or/not)
1580 # delete repeated operator... Would then go in infinite loop
1581 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1584 #process parenthesis before.
1585 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1588 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1590 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1592 my $leftresult = NZanalyse( $left, $server );
1594 my $rightresult = NZanalyse( $right, $server );
1596 # OK, we have the results for right and left part of the query
1597 # depending of operand, intersect, union or exclude both lists
1598 # to get a result list
1599 if ( $operator eq ' and ' ) {
1600 return NZoperatorAND($leftresult,$rightresult);
1602 elsif ( $operator eq ' or ' ) {
1604 # just merge the 2 strings
1605 return $leftresult . $rightresult;
1607 elsif ( $operator eq ' not ' ) {
1608 return NZoperatorNOT($leftresult,$rightresult);
1612 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1616 warn "string :" . $string if $DEBUG;
1617 $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1620 my $operator = lc($2); # FIXME: and/or/not are operators, not operands
1621 warn "no parenthesis. left : $left operator: $operator right: $right"
1624 # it's not a leaf, we have a and/or/not
1627 # reintroduce comma content if needed
1628 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1629 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1630 warn "node : $left / $operator / $right\n" if $DEBUG;
1631 my $leftresult = NZanalyse( $left, $server );
1632 my $rightresult = NZanalyse( $right, $server );
1633 warn " leftresult : $leftresult" if $DEBUG;
1634 warn " rightresult : $rightresult" if $DEBUG;
1635 # OK, we have the results for right and left part of the query
1636 # depending of operand, intersect, union or exclude both lists
1637 # to get a result list
1638 if ( $operator eq ' and ' ) {
1640 return NZoperatorAND($leftresult,$rightresult);
1642 elsif ( $operator eq ' or ' ) {
1644 # just merge the 2 strings
1645 return $leftresult . $rightresult;
1647 elsif ( $operator eq ' not ' ) {
1648 return NZoperatorNOT($leftresult,$rightresult);
1652 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1653 die "error : operand unknown : $operator for $string";
1656 # it's a leaf, do the real SQL query and return the result
1659 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1660 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1661 warn "leaf:$string" if $DEBUG;
1663 # parse the string in in operator/operand/value again
1664 $string =~ /(.*)(>=|<=)(.*)/;
1668 # warn "handling leaf... left:$left operator:$operator right:$right"
1670 unless ($operator) {
1671 $string =~ /(.*)(>|<|=)(.*)/;
1676 # "handling unless (operator)... left:$left operator:$operator right:$right"
1681 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1682 $left =~ s/[, ].*$//;
1684 # automatic replace for short operators
1685 $left = 'title' if $left =~ '^ti$';
1686 $left = 'author' if $left =~ '^au$';
1687 $left = 'publisher' if $left =~ '^pb$';
1688 $left = 'subject' if $left =~ '^su$';
1689 $left = 'koha-Auth-Number' if $left =~ '^an$';
1690 $left = 'keyword' if $left =~ '^kw$';
1691 warn "handling leaf... left:$left operator:$operator right:$right";
1692 if ( $operator && $left ne 'keyword' ) {
1694 #do a specific search
1695 my $dbh = C4::Context->dbh;
1696 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1699 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1701 warn "$left / $operator / $right\n";
1703 # split each word, query the DB and build the biblionumbers result
1704 #sanitizing leftpart
1705 $left =~ s/^\s+|\s+$//;
1706 foreach ( split / /, $right ) {
1708 $_ =~ s/^\s+|\s+$//;
1710 warn "EXECUTE : $server, $left, $_";
1711 $sth->execute( $server, $left, $_ )
1712 or warn "execute failed: $!";
1713 while ( my ( $line, $value ) = $sth->fetchrow ) {
1715 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1716 # otherwise, fill the result
1717 $biblionumbers .= $line
1718 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1719 warn "result : $value "
1720 . ( $right =~ /\d/ ) . "=="
1721 . ( $value =~ /\D/?$line:"" ); #= $line";
1724 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1727 $results = NZoperatorAND($biblionumbers,$results);
1730 $results = $biblionumbers;
1736 #do a complete search (all indexes), if index='kw' do complete search too.
1737 my $dbh = C4::Context->dbh;
1740 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1743 # split each word, query the DB and build the biblionumbers result
1744 foreach ( split / /, $string ) {
1745 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1746 warn "search on all indexes on $_" if $DEBUG;
1749 $sth->execute( $server, $_ );
1750 while ( my $line = $sth->fetchrow ) {
1751 $biblionumbers .= $line;
1754 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1756 $results = NZoperatorAND($biblionumbers,$results);
1759 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1760 $results = $biblionumbers;
1764 warn "return : $results for LEAF : $string" if $DEBUG;
1767 warn "---------" if $DEBUG;
1768 warn "Leave NZanalyse" if $DEBUG;
1769 warn "---------" if $DEBUG;
1773 my ($rightresult, $leftresult)=@_;
1775 my @leftresult = split /;/, $leftresult;
1776 warn " @leftresult / $rightresult \n" if $DEBUG;
1778 # my @rightresult = split /;/,$leftresult;
1781 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1782 # the result is stored twice, to have the same weight for AND than OR.
1783 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1784 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1785 foreach (@leftresult) {
1788 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1789 if ( $rightresult =~ /$value-(\d+);/ ) {
1790 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1792 "$value-$countvalue;$value-$countvalue;";
1795 warn " $finalresult \n" if $DEBUG;
1796 return $finalresult;
1800 my ($rightresult, $leftresult)=@_;
1801 return $rightresult.$leftresult;
1805 my ($rightresult, $leftresult)=@_;
1807 my @leftresult = split /;/, $leftresult;
1809 # my @rightresult = split /;/,$leftresult;
1811 foreach (@leftresult) {
1813 $value=$1 if $value=~m/(.*)-\d+$/;
1814 unless ($rightresult =~ "$value-") {
1815 $finalresult .= "$_;";
1818 return $finalresult;
1823 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1830 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1831 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1833 # order title asc by default
1834 # $ordering = '1=36 <i' unless $ordering;
1835 $results_per_page = 20 unless $results_per_page;
1836 $offset = 0 unless $offset;
1837 my $dbh = C4::Context->dbh;
1840 # order by POPULARITY
1842 if ( $ordering =~ /popularity/ ) {
1846 # popularity is not in MARC record, it's builded from a specific query
1848 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1849 foreach ( split /;/, $biblionumbers ) {
1850 my ( $biblionumber, $title ) = split /,/, $_;
1851 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1852 $sth->execute($biblionumber);
1853 my $popularity = $sth->fetchrow || 0;
1855 # hint : the key is popularity.title because we can have
1856 # many results with the same popularity. In this cas, sub-ordering is done by title
1857 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1858 # (un-frequent, I agree, but we won't forget anything that way ;-)
1859 $popularity{ sprintf( "%10d", $popularity ) . $title
1860 . $biblionumber } = $biblionumber;
1863 # sort the hash and return the same structure as GetRecords (Zebra querying)
1866 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1867 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1868 $result_hash->{'RECORDS'}[ $numbers++ ] =
1869 $result{ $popularity{$key} }->as_usmarc();
1872 else { # sort popularity ASC
1873 foreach my $key ( sort ( keys %popularity ) ) {
1874 $result_hash->{'RECORDS'}[ $numbers++ ] =
1875 $result{ $popularity{$key} }->as_usmarc();
1878 my $finalresult = ();
1879 $result_hash->{'hits'} = $numbers;
1880 $finalresult->{'biblioserver'} = $result_hash;
1881 return $finalresult;
1887 elsif ( $ordering =~ /author/ ) {
1889 foreach ( split /;/, $biblionumbers ) {
1890 my ( $biblionumber, $title ) = split /,/, $_;
1891 my $record = GetMarcBiblio($biblionumber);
1893 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1894 $author = $record->subfield( '200', 'f' );
1895 $author = $record->subfield( '700', 'a' ) unless $author;
1898 $author = $record->subfield( '100', 'a' );
1901 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1902 # and we don't want to get only 1 result for each of them !!!
1903 $result{ $author . $biblionumber } = $record;
1906 # sort the hash and return the same structure as GetRecords (Zebra querying)
1909 if ( $ordering eq 'author_za' ) { # sort by author desc
1910 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1911 $result_hash->{'RECORDS'}[ $numbers++ ] =
1912 $result{$key}->as_usmarc();
1915 else { # sort by author ASC
1916 foreach my $key ( sort ( keys %result ) ) {
1917 $result_hash->{'RECORDS'}[ $numbers++ ] =
1918 $result{$key}->as_usmarc();
1921 my $finalresult = ();
1922 $result_hash->{'hits'} = $numbers;
1923 $finalresult->{'biblioserver'} = $result_hash;
1924 return $finalresult;
1927 # ORDER BY callnumber
1930 elsif ( $ordering =~ /callnumber/ ) {
1932 foreach ( split /;/, $biblionumbers ) {
1933 my ( $biblionumber, $title ) = split /,/, $_;
1934 my $record = GetMarcBiblio($biblionumber);
1936 my ( $callnumber_tag, $callnumber_subfield ) =
1937 GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1938 ( $callnumber_tag, $callnumber_subfield ) =
1939 GetMarcFromKohaField('biblioitems.callnumber')
1940 unless $callnumber_tag;
1941 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1942 $callnumber = $record->subfield( '200', 'f' );
1945 $callnumber = $record->subfield( '100', 'a' );
1948 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1949 # and we don't want to get only 1 result for each of them !!!
1950 $result{ $callnumber . $biblionumber } = $record;
1953 # sort the hash and return the same structure as GetRecords (Zebra querying)
1956 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1957 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1958 $result_hash->{'RECORDS'}[ $numbers++ ] =
1959 $result{$key}->as_usmarc();
1962 else { # sort by title ASC
1963 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1964 $result_hash->{'RECORDS'}[ $numbers++ ] =
1965 $result{$key}->as_usmarc();
1968 my $finalresult = ();
1969 $result_hash->{'hits'} = $numbers;
1970 $finalresult->{'biblioserver'} = $result_hash;
1971 return $finalresult;
1973 elsif ( $ordering =~ /pubdate/ ) { #pub year
1975 foreach ( split /;/, $biblionumbers ) {
1976 my ( $biblionumber, $title ) = split /,/, $_;
1977 my $record = GetMarcBiblio($biblionumber);
1978 my ( $publicationyear_tag, $publicationyear_subfield ) =
1979 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1980 my $publicationyear =
1981 $record->subfield( $publicationyear_tag,
1982 $publicationyear_subfield );
1984 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1985 # and we don't want to get only 1 result for each of them !!!
1986 $result{ $publicationyear . $biblionumber } = $record;
1989 # sort the hash and return the same structure as GetRecords (Zebra querying)
1992 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1993 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1994 $result_hash->{'RECORDS'}[ $numbers++ ] =
1995 $result{$key}->as_usmarc();
1998 else { # sort by pub year ASC
1999 foreach my $key ( sort ( keys %result ) ) {
2000 $result_hash->{'RECORDS'}[ $numbers++ ] =
2001 $result{$key}->as_usmarc();
2004 my $finalresult = ();
2005 $result_hash->{'hits'} = $numbers;
2006 $finalresult->{'biblioserver'} = $result_hash;
2007 return $finalresult;
2013 elsif ( $ordering =~ /title/ ) {
2015 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2017 foreach ( split /;/, $biblionumbers ) {
2018 my ( $biblionumber, $title ) = split /,/, $_;
2020 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2021 # and we don't want to get only 1 result for each of them !!!
2022 # hint & speed improvement : we can order without reading the record
2023 # so order, and read records only for the requested page !
2024 $result{ $title . $biblionumber } = $biblionumber;
2027 # sort the hash and return the same structure as GetRecords (Zebra querying)
2030 if ( $ordering eq 'title_az' ) { # sort by title desc
2031 foreach my $key ( sort ( keys %result ) ) {
2032 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2035 else { # sort by title ASC
2036 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2037 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2041 # limit the $results_per_page to result size if it's more
2042 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2044 # for the requested page, replace biblionumber by the complete record
2045 # speed improvement : avoid reading too much things
2047 my $counter = $offset ;
2048 $counter <= $offset + $results_per_page ;
2052 $result_hash->{'RECORDS'}[$counter] =
2053 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2055 my $finalresult = ();
2056 $result_hash->{'hits'} = $numbers;
2057 $finalresult->{'biblioserver'} = $result_hash;
2058 return $finalresult;
2065 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2068 foreach ( split /;/, $biblionumbers ) {
2069 my ( $biblionumber, $title ) = split /,/, $_;
2070 $title =~ /(.*)-(\d)/;
2075 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2076 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2077 # biblio N has ranking = 6
2078 $count_ranking{$biblionumber} += $ranking;
2081 # build the result by "inverting" the count_ranking hash
2082 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2084 foreach ( keys %count_ranking ) {
2085 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2088 # sort the hash and return the same structure as GetRecords (Zebra querying)
2091 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2092 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2095 # limit the $results_per_page to result size if it's more
2096 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2098 # for the requested page, replace biblionumber by the complete record
2099 # speed improvement : avoid reading too much things
2101 my $counter = $offset ;
2102 $counter <= $offset + $results_per_page ;
2106 $result_hash->{'RECORDS'}[$counter] =
2107 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2108 if $result_hash->{'RECORDS'}[$counter];
2110 my $finalresult = ();
2111 $result_hash->{'hits'} = $numbers;
2112 $finalresult->{'biblioserver'} = $result_hash;
2113 return $finalresult;
2119 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2121 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2122 test parameter if set donot perform change to records in database.
2128 * $listbiblios is an array ref to marcrecords to be changed
2129 * $tagsubfield is the reference of the subfield to change.
2130 * $initvalue is the value to search the record for
2131 * $targetvalue is the value to set the subfield to
2132 * $test is to be set only not to perform changes in database.
2134 =item C<Output arg:>
2135 * $countchanged counts all the changes performed.
2136 * $listunchanged contains the list of all the biblionumbers of records unchanged.
2138 =item C<usage in the script:>
2142 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2143 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2144 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2149 my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2152 my ( $tag, $subfield ) = ( $1, $2 )
2153 if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2154 if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2155 $tag = $tag . $subfield;
2158 my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber');
2159 my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2160 if ($tag eq $itemtag) {
2161 # do not allow the embedded item tag to be
2163 warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2166 foreach my $usmarc (@$listbiblios) {
2168 $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2172 # usmarc is not a valid usmarc May be a biblionumber
2173 # FIXME - sorry, please let's figure out whether
2174 # this function is to be passed a list of
2175 # record numbers or a list of MARC::Record
2176 # objects. The former is probably better
2177 # because the MARC records supplied by Zebra
2178 # may be not current.
2179 $record = GetMarcBiblio($usmarc);
2180 $biblionumber = $usmarc;
2183 if ( $bntag >= 010 ) {
2184 $biblionumber = $record->subfield( $bntag, $bnsubf );
2187 $biblionumber = $record->field($bntag)->data;
2191 #GetBiblionumber is to be written.
2192 #Could be replaced by TransformMarcToKoha (But Would be longer)
2193 if ( $record->field($tag) ) {
2195 foreach my $field ( $record->field($tag) ) {
2198 $field->delete_subfield(
2199 'code' => $subfield,
2200 'match' => qr($initvalue)
2206 $field->update( $subfield, $targetvalue )
2211 if ( $tag >= 010 ) {
2212 if ( $field->delete_field($field) ) {
2218 $field->data = $targetvalue
2219 if ( $field->data =~ qr($initvalue) );
2224 # warn $record->as_formatted;
2226 ModBiblio( $record, $biblionumber,
2227 GetFrameworkCode($biblionumber) )
2231 push @unmatched, $biblionumber;
2235 push @unmatched, $biblionumber;
2238 return ( $countmatched, \@unmatched );
2241 END { } # module clean-up code here (global destructor)
2248 Koha Developement team <info@koha.org>