3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
21 use C4::Biblio; # GetMarcFromKohaField
22 use C4::Koha; # getFacets
24 use C4::Search::PazPar2;
26 use C4::Dates qw(format_date);
29 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
31 # set the version for version checking
34 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
39 C4::Search - Functions for searching the Koha catalog.
43 See opac/opac-search.pl or catalogue/search.pl for example of usage
47 This module provides searching functions for Koha's bibliographic databases
65 # make all your functions, whether exported or not;
67 =head2 findseealso($dbh,$fields);
69 C<$dbh> is a link to the DB handler.
72 my $dbh =C4::Context->dbh;
74 C<$fields> is a reference to the fields array
76 This function modifies the @$fields array and adds related fields to search on.
78 FIXME: this function is probably deprecated in Koha 3
83 my ( $dbh, $fields ) = @_;
84 my $tagslib = GetMarcStructure(1);
85 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
86 my ($tag) = substr( @$fields[$i], 1, 3 );
87 my ($subfield) = substr( @$fields[$i], 4, 1 );
88 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
89 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
103 my $dbh = C4::Context->dbh;
104 my $result = TransformMarcToKoha( $dbh, $record, '' );
109 my ( $biblionumber, $title );
111 # search duplicate on ISBN, easy and fast..
112 # ... normalize first
113 if ( $result->{isbn} ) {
114 $result->{isbn} =~ s/\(.*$//;
115 $result->{isbn} =~ s/\s+$//;
116 $query = "isbn=$result->{isbn}";
119 $result->{title} =~ s /\\//g;
120 $result->{title} =~ s /\"//g;
121 $result->{title} =~ s /\(//g;
122 $result->{title} =~ s /\)//g;
124 # FIXME: instead of removing operators, could just do
125 # quotes around the value
126 $result->{title} =~ s/(and|or|not)//g;
127 $query = "ti,ext=$result->{title}";
128 $query .= " and itemtype=$result->{itemtype}"
129 if ( $result->{itemtype} );
130 if ( $result->{author} ) {
131 $result->{author} =~ s /\\//g;
132 $result->{author} =~ s /\"//g;
133 $result->{author} =~ s /\(//g;
134 $result->{author} =~ s /\)//g;
136 # remove valid operators
137 $result->{author} =~ s/(and|or|not)//g;
138 $query .= " and au,ext=$result->{author}";
142 # FIXME: add error handling
143 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
145 foreach my $possible_duplicate_record (@$searchresults) {
147 MARC::Record->new_from_usmarc($possible_duplicate_record);
148 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
150 # FIXME :: why 2 $biblionumber ?
152 push @results, $result->{'biblionumber'};
153 push @results, $result->{'title'};
161 ($error,$results) = SimpleSearch( $query, $offset, $max_results, [ @servers ] );
163 This function provides a simple search API on the bibliographic catalog
169 * $query can be a simple keyword or a complete CCL query
170 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
171 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
172 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
176 * $error is a empty unless an error is detected
177 * \@results is an array of records.
179 =item C<usage in the script:>
183 my ($error, $marcresults) = SimpleSearch($query);
185 if (defined $error) {
186 $template->param(query_error => $error);
187 warn "error: ".$error;
188 output_html_with_http_headers $input, $cookie, $template->output;
192 my $hits = scalar @$marcresults;
195 for(my $i=0;$i<$hits;$i++) {
197 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
198 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
200 #build the hash for the template.
201 $resultsloop{highlight} = ($i % 2)?(1):(0);
202 $resultsloop{title} = $biblio->{'title'};
203 $resultsloop{subtitle} = $biblio->{'subtitle'};
204 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
205 $resultsloop{author} = $biblio->{'author'};
206 $resultsloop{publishercode} = $biblio->{'publishercode'};
207 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
209 push @results, \%resultsloop;
212 $template->param(result=>\@results);
217 my ( $query, $offset, $max_results, $servers ) = @_;
219 if ( C4::Context->preference('NoZebra') ) {
220 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
223 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
224 return ( undef, $search_result, scalar($search_result) );
227 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
228 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
233 return ( "No query entered", undef, undef ) unless $query;
235 # Initialize & Search Zebra
236 for ( my $i = 0 ; $i < @servers ; $i++ ) {
238 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
241 ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
245 $zconns[$i]->errmsg() . " ("
246 . $zconns[$i]->errcode() . ") "
247 . $zconns[$i]->addinfo() . " "
248 . $zconns[$i]->diagset();
250 return ( $error, undef, undef ) if $zconns[$i]->errcode();
254 # caught a ZOOM::Exception
258 . $@->addinfo() . " "
261 return ( $error, undef, undef );
264 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
265 my $event = $zconns[ $i - 1 ]->last_event();
266 if ( $event == ZOOM::Event::ZEND ) {
268 my $first_record = defined( $offset ) ? $offset+1 : 1;
269 my $hits = $tmpresults[ $i - 1 ]->size();
270 $total_hits += $hits;
271 my $last_record = $hits;
272 if ( defined $max_results && $offset + $max_results < $hits ) {
273 $last_record = $offset + $max_results;
276 for my $j ( $first_record..$last_record ) {
277 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
278 push @results, $record;
283 return ( undef, \@results, $total_hits );
289 ( undef, $results_hashref, \@facets_loop ) = getRecords (
291 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
292 $results_per_page, $offset, $expanded_facet, $branches,
296 The all singing, all dancing, multi-server, asynchronous, scanning,
297 searching, record nabbing, facet-building
299 See verbse embedded documentation.
305 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
306 $results_per_page, $offset, $expanded_facet, $branches,
310 my @servers = @$servers_ref;
311 my @sort_by = @$sort_by_ref;
313 # Initialize variables for the ZOOM connection and results object
317 my $results_hashref = ();
319 # Initialize variables for the faceted results objects
320 my $facets_counter = ();
321 my $facets_info = ();
322 my $facets = getFacets();
325 ; # stores the ref to array of hashes for template facets loop
327 ### LOOP THROUGH THE SERVERS
328 for ( my $i = 0 ; $i < @servers ; $i++ ) {
329 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
331 # perform the search, create the results objects
332 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
333 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
335 #$query_to_use = $simple_query if $scan;
336 warn $simple_query if ( $scan and $DEBUG );
338 # Check if we've got a query_type defined, if so, use it
342 if ( $query_type =~ /^ccl/ ) {
344 s/\:/\=/g; # change : to = last minute (FIXME)
347 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
350 elsif ( $query_type =~ /^cql/ ) {
353 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
355 elsif ( $query_type =~ /^pqf/ ) {
358 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
365 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
371 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
377 warn "WARNING: query problem with $query_to_use " . $@;
380 # Concatenate the sort_by limits and pass them to the results object
381 # Note: sort will override rank
383 foreach my $sort (@sort_by) {
384 if ( $sort eq "author_az" ) {
385 $sort_by .= "1=1003 <i ";
387 elsif ( $sort eq "author_za" ) {
388 $sort_by .= "1=1003 >i ";
390 elsif ( $sort eq "popularity_asc" ) {
391 $sort_by .= "1=9003 <i ";
393 elsif ( $sort eq "popularity_dsc" ) {
394 $sort_by .= "1=9003 >i ";
396 elsif ( $sort eq "call_number_asc" ) {
397 $sort_by .= "1=20 <i ";
399 elsif ( $sort eq "call_number_dsc" ) {
400 $sort_by .= "1=20 >i ";
402 elsif ( $sort eq "pubdate_asc" ) {
403 $sort_by .= "1=31 <i ";
405 elsif ( $sort eq "pubdate_dsc" ) {
406 $sort_by .= "1=31 >i ";
408 elsif ( $sort eq "acqdate_asc" ) {
409 $sort_by .= "1=32 <i ";
411 elsif ( $sort eq "acqdate_dsc" ) {
412 $sort_by .= "1=32 >i ";
414 elsif ( $sort eq "title_az" ) {
415 $sort_by .= "1=4 <i ";
417 elsif ( $sort eq "title_za" ) {
418 $sort_by .= "1=4 >i ";
422 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
423 warn "WARNING sort $sort_by failed";
426 } # finished looping through servers
428 # The big moment: asynchronously retrieve results from all servers
429 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
430 my $ev = $zconns[ $i - 1 ]->last_event();
431 if ( $ev == ZOOM::Event::ZEND ) {
432 next unless $results[ $i - 1 ];
433 my $size = $results[ $i - 1 ]->size();
437 # loop through the results
438 $results_hash->{'hits'} = $size;
440 if ( $offset + $results_per_page <= $size ) {
441 $times = $offset + $results_per_page;
446 for ( my $j = $offset ; $j < $times ; $j++ ) {
451 ## Check if it's an index scan
453 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
455 # here we create a minimal MARC record and hand it off to the
456 # template just like a normal result ... perhaps not ideal, but
458 my $tmprecord = MARC::Record->new();
459 $tmprecord->encoding('UTF-8');
463 # the minimal record in author/title (depending on MARC flavour)
464 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
465 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
466 $tmprecord->append_fields($tmptitle);
468 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
469 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
470 $tmprecord->append_fields($tmptitle);
471 $tmprecord->append_fields($tmpauthor);
473 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
478 $record = $results[ $i - 1 ]->record($j)->raw();
480 # warn "RECORD $j:".$record;
481 $results_hash->{'RECORDS'}[$j] = $record;
483 # Fill the facets while we're looping, but only for the biblioserver
484 $facet_record = MARC::Record->new_from_usmarc($record)
485 if $servers[ $i - 1 ] =~ /biblioserver/;
487 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
489 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
491 if ( $facets->[$k] ) {
493 for my $tag ( @{ $facets->[$k]->{'tags'} } )
496 $facet_record->field($tag);
498 for my $field (@fields) {
499 my @subfields = $field->subfields();
500 for my $subfield (@subfields) {
501 my ( $code, $data ) = @$subfield;
503 $facets->[$k]->{'subfield'} )
505 $facets_counter->{ $facets->[$k]
511 $facets_info->{ $facets->[$k]
512 ->{'link_value'} }->{'label_value'} =
513 $facets->[$k]->{'label_value'};
514 $facets_info->{ $facets->[$k]
515 ->{'link_value'} }->{'expanded'} =
516 $facets->[$k]->{'expanded'};
522 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
525 # warn "connection ", $i-1, ": $size hits";
526 # warn $results[$i-1]->record(0)->render() if $size > 0;
529 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
531 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
532 keys %$facets_counter )
535 my $number_of_facets;
536 my @this_facets_array;
539 $facets_counter->{$link_value}
540 ->{$b} <=> $facets_counter->{$link_value}->{$a}
541 } keys %{ $facets_counter->{$link_value} }
545 if ( ( $number_of_facets < 6 )
546 || ( $expanded_facet eq $link_value )
547 || ( $facets_info->{$link_value}->{'expanded'} ) )
550 # Sanitize the link value ), ( will cause errors with CCL,
551 my $facet_link_value = $one_facet;
552 $facet_link_value =~ s/(\(|\))/ /g;
554 # fix the length that will display in the label,
555 my $facet_label_value = $one_facet;
557 substr( $one_facet, 0, 20 ) . "..."
558 unless length($facet_label_value) <= 20;
560 # if it's a branch, label by the name, not the code,
561 if ( $link_value =~ /branch/ ) {
563 $branches->{$one_facet}->{'branchname'};
566 # but we're down with the whole label being in the link's title.
567 my $facet_title_value = $one_facet;
569 push @this_facets_array,
573 $facets_counter->{$link_value}
575 facet_label_value => $facet_label_value,
576 facet_title_value => $facet_title_value,
577 facet_link_value => $facet_link_value,
578 type_link_value => $link_value,
584 # handle expanded option
585 unless ( $facets_info->{$link_value}->{'expanded'} ) {
587 if ( ( $number_of_facets > 6 )
588 && ( $expanded_facet ne $link_value ) );
593 type_link_value => $link_value,
594 type_id => $link_value . "_id",
596 $facets_info->{$link_value}->{'label_value'},
597 facets => \@this_facets_array,
598 expandable => $expandable,
599 expand => $link_value,
606 return ( undef, $results_hashref, \@facets_loop );
611 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
612 $results_per_page, $offset, $expanded_facet, $branches,
616 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
618 $paz->search($simple_query);
622 my $results_hashref = {};
623 my $stats = XMLin($paz->stat);
624 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
626 # for a grouped search result, the number of hits
627 # is the number of groups returned; 'bib_hits' will have
628 # the total number of bibs.
629 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
630 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
632 HIT: foreach my $hit (@{ $results->{'hit'} }) {
633 my $recid = $hit->{recid}->[0];
635 my $work_title = $hit->{'md-work-title'}->[0];
637 if (exists $hit->{'md-work-author'}) {
638 $work_author = $hit->{'md-work-author'}->[0];
640 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
642 my $result_group = {};
643 $result_group->{'group_label'} = $group_label;
644 $result_group->{'group_merge_key'} = $recid;
647 if (exists $hit->{count}) {
648 $count = $hit->{count}->[0];
650 $result_group->{'group_count'} = $count;
652 for (my $i = 0; $i < $count; $i++) {
653 # FIXME -- may need to worry about diacritics here
654 my $rec = $paz->record($recid, $i);
655 push @{ $result_group->{'RECORDS'} }, $rec;
658 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
661 # pass through facets
662 my $termlist_xml = $paz->termlist('author,subject');
663 my $terms = XMLin($termlist_xml, forcearray => 1);
664 my @facets_loop = ();
665 #die Dumper($results);
666 # foreach my $list (sort keys %{ $terms->{'list'} }) {
668 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
670 # facet_label_value => $facet->{'name'}->[0],
673 # push @facets_loop, ( {
674 # type_label => $list,
675 # facets => \@facets,
679 return ( undef, $results_hashref, \@facets_loop );
683 sub _remove_stopwords {
684 my ( $operand, $index ) = @_;
685 my @stopwords_removed;
687 # phrase and exact-qualified indexes shouldn't have stopwords removed
688 if ( $index !~ m/phr|ext/ ) {
690 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
691 # we use IsAlpha unicode definition, to deal correctly with diacritics.
692 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
693 # is a stopword, we'd get "çon" and wouldn't find anything...
694 foreach ( keys %{ C4::Context->stopwords } ) {
695 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
697 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
699 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
700 $operand =~ s/^$_\P{IsAlpha}/ /gi;
701 $operand =~ s/\P{IsAlpha}$_$/ /gi;
702 push @stopwords_removed, $_;
706 return ( $operand, \@stopwords_removed );
710 sub _detect_truncation {
711 my ( $operand, $index ) = @_;
712 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
715 my @wordlist = split( /\s/, $operand );
716 foreach my $word (@wordlist) {
717 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
718 push @rightlefttruncated, $word;
720 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
721 push @lefttruncated, $word;
723 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
724 push @righttruncated, $word;
726 elsif ( index( $word, "*" ) < 0 ) {
727 push @nontruncated, $word;
730 push @regexpr, $word;
734 \@nontruncated, \@righttruncated, \@lefttruncated,
735 \@rightlefttruncated, \@regexpr
740 sub _build_stemmed_operand {
744 # FIXME: the locale should be set based on the user's language and/or search choice
745 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
747 # FIXME: these should be stored in the db so the librarian can modify the behavior
748 $stemmer->add_exceptions(
755 my @words = split( / /, $operand );
756 my $stems = $stemmer->stem(@words);
757 for my $stem (@$stems) {
758 $stemmed_operand .= "$stem";
759 $stemmed_operand .= "?"
760 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
761 $stemmed_operand .= " ";
763 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
764 return $stemmed_operand;
768 sub _build_weighted_query {
770 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
771 # pretty well but could work much better if we had a smarter query parser
772 my ( $operand, $stemmed_operand, $index ) = @_;
773 my $stemming = C4::Context->preference("QueryStemming") || 0;
774 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
775 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
777 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
779 # Keyword, or, no index specified
780 if ( ( $index eq 'kw' ) || ( !$index ) ) {
782 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
783 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
784 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
785 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
786 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
787 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
788 if $fuzzy_enabled; # add fuzzy, word list
789 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
790 if ( $stemming and $stemmed_operand )
791 ; # add stemming, right truncation
792 $weighted_query .= " or wrdl,r9=\"$operand\"";
794 # embedded sorting: 0 a-z; 1 z-a
795 # $weighted_query .= ") or (sort1,aut=1";
798 # Barcode searches should skip this process
799 elsif ( $index eq 'bc' ) {
800 $weighted_query .= "bc=\"$operand\"";
803 # Authority-number searches should skip this process
804 elsif ( $index eq 'an' ) {
805 $weighted_query .= "an=\"$operand\"";
808 # If the index already has more than one qualifier, wrap the operand
809 # in quotes and pass it back (assumption is that the user knows what they
810 # are doing and won't appreciate us mucking up their query
811 elsif ( $index =~ ',' ) {
812 $weighted_query .= " $index=\"$operand\"";
815 #TODO: build better cases based on specific search indexes
817 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
818 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
819 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
821 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
824 $weighted_query .= "))"; # close rank specification
825 return $weighted_query;
831 $simple_query, $query_cgi,
833 $limit_cgi, $limit_desc,
834 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
836 Build queries and limits in CCL, CGI, Human,
837 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
839 See verbose embedded documentation.
845 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
847 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
850 my @operators = @$operators if $operators;
851 my @indexes = @$indexes if $indexes;
852 my @operands = @$operands if $operands;
853 my @limits = @$limits if $limits;
854 my @sort_by = @$sort_by if $sort_by;
856 my $stemming = C4::Context->preference("QueryStemming") || 0;
857 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
858 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
859 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
860 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
862 # no stemming/weight/fuzzy in NoZebra
863 if ( C4::Context->preference("NoZebra") ) {
869 my $query = $operands[0];
870 my $simple_query = $operands[0];
872 # initialize the variables we're passing back
881 my $stopwords_removed; # flag to determine if stopwords have been removed
883 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
885 if ( $query =~ /^ccl=/ ) {
886 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
888 if ( $query =~ /^cql=/ ) {
889 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
891 if ( $query =~ /^pqf=/ ) {
892 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
895 # pass nested queries directly
896 # FIXME: need better handling of some of these variables in this case
897 if ( $query =~ /(\(|\))/ ) {
899 undef, $query, $simple_query, $query_cgi,
900 $query, $limit, $limit_cgi, $limit_desc,
901 $stopwords_removed, 'ccl'
905 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
906 # query operands and indexes and add stemming, truncation, field weighting, etc.
907 # Once we do so, we'll end up with a value in $query, just like if we had an
908 # incoming $query from the user
911 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
913 ; # a flag used to keep track if there was a previous query
914 # if there was, we can apply the current operator
916 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
918 # COMBINE OPERANDS, INDEXES AND OPERATORS
919 if ( $operands[$i] ) {
921 # A flag to determine whether or not to add the index to the query
924 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
925 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
928 $remove_stopwords = 0;
930 my $operand = $operands[$i];
931 my $index = $indexes[$i];
933 # Add index-specific attributes
934 # Date of Publication
935 if ( $index eq 'yr' ) {
936 $index .= ",st-numeric";
938 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
941 # Date of Acquisition
942 elsif ( $index eq 'acqdate' ) {
943 $index .= ",st-date-normalized";
945 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
947 # ISBN,ISSN,Standard Number, don't need special treatment
948 elsif ( $index eq 'nb' || $index eq 'ns' ) {
951 $stemming, $auto_truncation,
952 $weight_fields, $fuzzy_enabled,
954 ) = ( 0, 0, 0, 0, 0 );
957 # Set default structure attribute (word list)
959 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
960 $struct_attr = ",wrdl";
963 # Some helpful index variants
964 my $index_plus = $index . $struct_attr . ":" if $index;
965 my $index_plus_comma = $index . $struct_attr . "," if $index;
968 if ($remove_stopwords) {
969 ( $operand, $stopwords_removed ) =
970 _remove_stopwords( $operand, $index );
971 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
972 warn "REMOVED STOPWORDS: @$stopwords_removed"
973 if ( $stopwords_removed && $DEBUG );
977 my ( $nontruncated, $righttruncated, $lefttruncated,
978 $rightlefttruncated, $regexpr );
979 my $truncated_operand;
981 $nontruncated, $righttruncated, $lefttruncated,
982 $rightlefttruncated, $regexpr
983 ) = _detect_truncation( $operand, $index );
985 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
990 scalar(@$righttruncated) + scalar(@$lefttruncated) +
991 scalar(@$rightlefttruncated) > 0 )
994 # Don't field weight or add the index to the query, we do it here
996 undef $weight_fields;
997 my $previous_truncation_operand;
998 if ( scalar(@$nontruncated) > 0 ) {
999 $truncated_operand .= "$index_plus @$nontruncated ";
1000 $previous_truncation_operand = 1;
1002 if ( scalar(@$righttruncated) > 0 ) {
1003 $truncated_operand .= "and "
1004 if $previous_truncation_operand;
1005 $truncated_operand .=
1006 "$index_plus_comma" . "rtrn:@$righttruncated ";
1007 $previous_truncation_operand = 1;
1009 if ( scalar(@$lefttruncated) > 0 ) {
1010 $truncated_operand .= "and "
1011 if $previous_truncation_operand;
1012 $truncated_operand .=
1013 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1014 $previous_truncation_operand = 1;
1016 if ( scalar(@$rightlefttruncated) > 0 ) {
1017 $truncated_operand .= "and "
1018 if $previous_truncation_operand;
1019 $truncated_operand .=
1020 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1021 $previous_truncation_operand = 1;
1024 $operand = $truncated_operand if $truncated_operand;
1025 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1028 my $stemmed_operand;
1029 $stemmed_operand = _build_stemmed_operand($operand)
1031 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1033 # Handle Field Weighting
1034 my $weighted_operand;
1036 _build_weighted_query( $operand, $stemmed_operand, $index )
1038 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1039 $operand = $weighted_operand if $weight_fields;
1040 $indexes_set = 1 if $weight_fields;
1042 # If there's a previous operand, we need to add an operator
1043 if ($previous_operand) {
1045 # User-specified operator
1046 if ( $operators[ $i - 1 ] ) {
1047 $query .= " $operators[$i-1] ";
1048 $query .= " $index_plus " unless $indexes_set;
1049 $query .= " $operand";
1050 $query_cgi .= "&op=$operators[$i-1]";
1051 $query_cgi .= "&idx=$index" if $index;
1052 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1054 " $operators[$i-1] $index_plus $operands[$i]";
1057 # Default operator is and
1060 $query .= "$index_plus " unless $indexes_set;
1061 $query .= "$operand";
1062 $query_cgi .= "&op=and&idx=$index" if $index;
1063 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1064 $query_desc .= " and $index_plus $operands[$i]";
1068 # There isn't a pervious operand, don't need an operator
1071 # Field-weighted queries already have indexes set
1072 $query .= " $index_plus " unless $indexes_set;
1074 $query_desc .= " $index_plus $operands[$i]";
1075 $query_cgi .= "&idx=$index" if $index;
1076 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1077 $previous_operand = 1;
1082 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1085 my $group_OR_limits;
1086 my $availability_limit;
1087 foreach my $this_limit (@limits) {
1088 if ( $this_limit =~ /available/ ) {
1090 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1092 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1093 $availability_limit .=
1094 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1095 $limit_cgi .= "&limit=available";
1099 # group_OR_limits, prefixed by mc-
1100 # OR every member of the group
1101 elsif ( $this_limit =~ /mc/ ) {
1102 $group_OR_limits .= " or " if $group_OR_limits;
1103 $limit_desc .= " or " if $group_OR_limits;
1104 $group_OR_limits .= "$this_limit";
1105 $limit_cgi .= "&limit=$this_limit";
1106 $limit_desc .= " $this_limit";
1109 # Regular old limits
1111 $limit .= " and " if $limit || $query;
1112 $limit .= "$this_limit";
1113 $limit_cgi .= "&limit=$this_limit";
1114 $limit_desc .= " $this_limit";
1117 if ($group_OR_limits) {
1118 $limit .= " and " if ( $query || $limit );
1119 $limit .= "($group_OR_limits)";
1121 if ($availability_limit) {
1122 $limit .= " and " if ( $query || $limit );
1123 $limit .= "($availability_limit)";
1126 # Normalize the query and limit strings
1129 for ( $query, $query_desc, $limit, $limit_desc ) {
1130 $_ =~ s/ / /g; # remove extra spaces
1131 $_ =~ s/^ //g; # remove any beginning spaces
1132 $_ =~ s/ $//g; # remove any ending spaces
1133 $_ =~ s/==/=/g; # remove double == from query
1135 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1137 for ($query_cgi,$simple_query) {
1140 # append the limit to the query
1141 $query .= " " . $limit;
1145 warn "QUERY:" . $query;
1146 warn "QUERY CGI:" . $query_cgi;
1147 warn "QUERY DESC:" . $query_desc;
1148 warn "LIMIT:" . $limit;
1149 warn "LIMIT CGI:" . $limit_cgi;
1150 warn "LIMIT DESC:" . $limit_desc;
1151 warn "---------\nLeave buildQuery\n---------";
1154 undef, $query, $simple_query, $query_cgi,
1155 $query_desc, $limit, $limit_cgi, $limit_desc,
1156 $stopwords_removed, $query_type
1160 =head2 searchResults
1162 Format results in a form suitable for passing to the template
1166 # IMO this subroutine is pretty messy still -- it's responsible for
1167 # building the HTML output for the template
1169 my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1170 my $dbh = C4::Context->dbh;
1174 # add search-term highlighting via <span>s on the search terms
1175 my $span_terms_hashref;
1176 for my $span_term ( split( / /, $searchdesc ) ) {
1177 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1178 $span_terms_hashref->{$span_term}++;
1181 #Build branchnames hash
1183 #get branch information.....
1186 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1187 ; # FIXME : use C4::Koha::GetBranches
1189 while ( my $bdata = $bsth->fetchrow_hashref ) {
1190 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1192 # FIXME - We build an authorised values hash here, using the default framework
1193 # though it is possible to have different authvals for different fws.
1195 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1197 # get notforloan authorised value list (see $shelflocations FIXME)
1198 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1200 #Build itemtype hash
1201 #find itemtype & itemtype image
1205 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1208 while ( my $bdata = $bsth->fetchrow_hashref ) {
1209 foreach (qw(description imageurl summary notforloan)) {
1210 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1214 #search item field code
1217 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1220 my ($itemtag) = $sth->fetchrow;
1222 ## find column names of items related to MARC
1223 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1225 my %subfieldstosearch;
1226 while ( ( my $column ) = $sth2->fetchrow ) {
1227 my ( $tagfield, $tagsubfield ) =
1228 &GetMarcFromKohaField( "items." . $column, "" );
1229 $subfieldstosearch{$column} = $tagsubfield;
1232 # handle which records to actually retrieve
1234 if ( $hits && $offset + $results_per_page <= $hits ) {
1235 $times = $offset + $results_per_page;
1238 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1241 # loop through all of the records we've retrieved
1242 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1243 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1244 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1245 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1246 $oldbiblio->{result_number} = $i + 1;
1248 # add imageurl to itemtype if there is one
1249 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1250 $oldbiblio->{imageurl} =
1251 $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1253 $oldbiblio->{imageurl} =
1254 getitemtypeimagesrc() . "/"
1255 . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1256 if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1258 my $biblio_authorised_value_images = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{biblionumber} ) );
1259 $oldbiblio->{authorised_value_images} = $biblio_authorised_value_images;
1260 my $aisbn = $oldbiblio->{'isbn'};
1261 $aisbn =~ /(\d*[X]*)/;
1262 $oldbiblio->{amazonisbn} = $1;
1263 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1264 # Build summary if there is one (the summary is defined in the itemtypes table)
1265 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1266 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1267 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1268 my @fields = $marcrecord->fields();
1269 foreach my $field (@fields) {
1270 my $tag = $field->tag();
1271 my $tagvalue = $field->as_string();
1273 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1274 unless ( $tag < 10 ) {
1275 my @subf = $field->subfields;
1276 for my $i ( 0 .. $#subf ) {
1277 my $subfieldcode = $subf[$i][0];
1278 my $subfieldvalue = $subf[$i][1];
1279 my $tagsubf = $tag . $subfieldcode;
1281 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1286 $summary =~ s/\[(.*?)]//g;
1287 $summary =~ s/\n/<br\/>/g;
1288 $oldbiblio->{summary} = $summary;
1291 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1292 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1293 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1294 # Add search-term highlighting to the whole record where they match using <span>s
1295 if (C4::Context->preference("OpacHighlightedWords")){
1296 my $searchhighlightblob;
1297 for my $highlight_field ( $marcrecord->fields ) {
1299 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1300 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1301 for my $subfield ($highlight_field->subfields()) {
1303 next if $subfield->[0] eq '9';
1304 my $field = $subfield->[1];
1305 for my $term ( keys %$span_terms_hashref ) {
1306 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1307 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1311 $searchhighlightblob .= $field . " ... " if $match;
1315 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1316 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1319 # Add search-term highlighting to the title, subtitle, etc. fields
1320 for my $term ( keys %$span_terms_hashref ) {
1321 my $old_term = $term;
1322 if ( length($term) > 3 ) {
1323 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1324 foreach(qw(title subtitle author publishercode place pages notes size)) {
1325 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1330 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1332 # Pull out the items fields
1333 my @fields = $marcrecord->field($itemtag);
1335 # Setting item statuses for display
1336 my @available_items_loop;
1337 my @onloan_items_loop;
1338 my @other_items_loop;
1340 my $available_items;
1344 my $ordered_count = 0;
1345 my $available_count = 0;
1346 my $onloan_count = 0;
1347 my $longoverdue_count = 0;
1348 my $other_count = 0;
1349 my $wthdrawn_count = 0;
1350 my $itemlost_count = 0;
1351 my $itembinding_count = 0;
1352 my $itemdamaged_count = 0;
1353 my $can_place_holds = 0;
1354 my $items_count = scalar(@fields);
1357 ( C4::Context->preference('maxItemsinSearchResults') )
1358 ? C4::Context->preference('maxItemsinSearchResults') - 1
1361 # loop through every item
1362 foreach my $field (@fields) {
1366 # populate the items hash
1367 foreach my $code ( keys %subfieldstosearch ) {
1368 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1370 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1371 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1372 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1373 if ($item->{$hbranch}) {
1374 $item->{'branchname'} = $branches{$item->{$hbranch}};
1376 elsif ($item->{$otherbranch}) { # Last resort
1377 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1380 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1381 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1382 if ( $item->{onloan} ) {
1384 my $key = $prefix . $item->{due_date};
1385 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1386 $onloan_items->{$key}->{count}++ if $item->{homebranch};
1387 $onloan_items->{$key}->{branchname} = $item->{branchname};
1388 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1389 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1390 $onloan_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1391 # if something's checked out and lost, mark it as 'long overdue'
1392 if ( $item->{itemlost} ) {
1393 $onloan_items->{$prefix}->{longoverdue}++;
1394 $longoverdue_count++;
1395 } else { # can place holds as long as item isn't lost
1396 $can_place_holds = 1;
1400 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1404 if ( $item->{notforloan} == -1 ) {
1408 # item is withdrawn, lost or damaged
1409 if ( $item->{wthdrawn}
1410 || $item->{itemlost}
1412 || $item->{notforloan} )
1414 $wthdrawn_count++ if $item->{wthdrawn};
1415 $itemlost_count++ if $item->{itemlost};
1416 $itemdamaged_count++ if $item->{damaged};
1417 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1420 my $key = $prefix . $item->{status};
1421 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1422 $other_items->{$key}->{$_} = $item->{$_};
1424 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1425 $other_items->{$key}->{count}++ if $item->{homebranch};
1426 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1427 $other_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1431 $can_place_holds = 1;
1433 $available_items->{$prefix}->{count}++ if $item->{homebranch};
1434 foreach (qw(branchname itemcallnumber)) {
1435 $available_items->{$prefix}->{$_} = $item->{$_};
1437 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1438 $available_items->{$prefix}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1441 } # notforloan, item level and biblioitem level
1442 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1444 ( C4::Context->preference('maxItemsinSearchResults') )
1445 ? C4::Context->preference('maxItemsinSearchResults') - 1
1447 for my $key ( sort keys %$onloan_items ) {
1448 (++$onloanitemscount > $maxitems) and last;
1449 push @onloan_items_loop, $onloan_items->{$key};
1451 for my $key ( sort keys %$other_items ) {
1452 (++$otheritemscount > $maxitems) and last;
1453 push @other_items_loop, $other_items->{$key};
1455 for my $key ( sort keys %$available_items ) {
1456 (++$availableitemscount > $maxitems) and last;
1457 push @available_items_loop, $available_items->{$key}
1460 # XSLT processing of some stuff
1461 if (C4::Context->preference("XSLTResultsDisplay") ) {
1462 my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber},C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1463 $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1466 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1467 $can_place_holds = 0
1468 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1469 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1470 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1471 $oldbiblio->{items_count} = $items_count;
1472 $oldbiblio->{available_items_loop} = \@available_items_loop;
1473 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1474 $oldbiblio->{other_items_loop} = \@other_items_loop;
1475 $oldbiblio->{availablecount} = $available_count;
1476 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1477 $oldbiblio->{onloancount} = $onloan_count;
1478 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1479 $oldbiblio->{othercount} = $other_count;
1480 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1481 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1482 $oldbiblio->{itemlostcount} = $itemlost_count;
1483 $oldbiblio->{damagedcount} = $itemdamaged_count;
1484 $oldbiblio->{orderedcount} = $ordered_count;
1485 $oldbiblio->{isbn} =~
1486 s/-//g; # deleting - in isbn to enable amazon content
1487 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'} ) );
1488 push( @newresults, $oldbiblio );
1493 #----------------------------------------------------------------------
1495 # Non-Zebra GetRecords#
1496 #----------------------------------------------------------------------
1500 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1506 $query, $simple_query, $sort_by_ref, $servers_ref,
1507 $results_per_page, $offset, $expanded_facet, $branches,
1510 warn "query =$query" if $DEBUG;
1511 my $result = NZanalyse($query);
1512 warn "results =$result" if $DEBUG;
1514 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1520 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1521 the list is built from an inverted index in the nozebra SQL table
1522 note that title is here only for convenience : the sorting will be very fast when requested on title
1523 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1528 my ( $string, $server ) = @_;
1529 # warn "---------" if $DEBUG;
1530 warn " NZanalyse" if $DEBUG;
1531 # warn "---------" if $DEBUG;
1533 # $server contains biblioserver or authorities, depending on what we search on.
1534 #warn "querying : $string on $server";
1535 $server = 'biblioserver' unless $server;
1537 # if we have a ", replace the content to discard temporarily any and/or/not inside
1539 if ( $string =~ /"/ ) {
1540 $string =~ s/"(.*?)"/__X__/;
1542 warn "commacontent : $commacontent" if $DEBUG;
1545 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1546 # then, call again NZanalyse with $left and $right
1547 # (recursive until we find a leaf (=> something without and/or/not)
1548 # delete repeated operator... Would then go in infinite loop
1549 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1552 #process parenthesis before.
1553 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1556 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1558 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1560 my $leftresult = NZanalyse( $left, $server );
1562 my $rightresult = NZanalyse( $right, $server );
1564 # OK, we have the results for right and left part of the query
1565 # depending of operand, intersect, union or exclude both lists
1566 # to get a result list
1567 if ( $operator eq ' and ' ) {
1568 return NZoperatorAND($leftresult,$rightresult);
1570 elsif ( $operator eq ' or ' ) {
1572 # just merge the 2 strings
1573 return $leftresult . $rightresult;
1575 elsif ( $operator eq ' not ' ) {
1576 return NZoperatorNOT($leftresult,$rightresult);
1580 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1584 warn "string :" . $string if $DEBUG;
1588 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1591 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1593 warn "no parenthesis. left : $left operator: $operator right: $right"
1596 # it's not a leaf, we have a and/or/not
1599 # reintroduce comma content if needed
1600 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1601 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1602 warn "node : $left / $operator / $right\n" if $DEBUG;
1603 my $leftresult = NZanalyse( $left, $server );
1604 my $rightresult = NZanalyse( $right, $server );
1605 warn " leftresult : $leftresult" if $DEBUG;
1606 warn " rightresult : $rightresult" if $DEBUG;
1607 # OK, we have the results for right and left part of the query
1608 # depending of operand, intersect, union or exclude both lists
1609 # to get a result list
1610 if ( $operator eq ' and ' ) {
1612 return NZoperatorAND($leftresult,$rightresult);
1614 elsif ( $operator eq ' or ' ) {
1616 # just merge the 2 strings
1617 return $leftresult . $rightresult;
1619 elsif ( $operator eq ' not ' ) {
1620 return NZoperatorNOT($leftresult,$rightresult);
1624 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1625 die "error : operand unknown : $operator for $string";
1628 # it's a leaf, do the real SQL query and return the result
1631 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1632 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1633 #remove trailing blank at the beginning
1635 warn "leaf:$string" if $DEBUG;
1637 # parse the string in in operator/operand/value again
1641 if ($string =~ /(.*)(>=|<=)(.*)/) {
1648 # warn "handling leaf... left:$left operator:$operator right:$right"
1650 unless ($operator) {
1651 if ($string =~ /(.*)(>|<|=)(.*)/) {
1656 "handling unless (operator)... left:$left operator:$operator right:$right"
1664 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1667 # automatic replace for short operators
1668 $left = 'title' if $left =~ '^ti$';
1669 $left = 'author' if $left =~ '^au$';
1670 $left = 'publisher' if $left =~ '^pb$';
1671 $left = 'subject' if $left =~ '^su$';
1672 $left = 'koha-Auth-Number' if $left =~ '^an$';
1673 $left = 'keyword' if $left =~ '^kw$';
1674 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1675 if ( $operator && $left ne 'keyword' ) {
1677 #do a specific search
1678 my $dbh = C4::Context->dbh;
1679 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1682 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1684 warn "$left / $operator / $right\n" if $DEBUG;
1686 # split each word, query the DB and build the biblionumbers result
1687 #sanitizing leftpart
1688 $left =~ s/^\s+|\s+$//;
1689 foreach ( split / /, $right ) {
1691 $_ =~ s/^\s+|\s+$//;
1693 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1694 $sth->execute( $server, $left, $_ )
1695 or warn "execute failed: $!";
1696 while ( my ( $line, $value ) = $sth->fetchrow ) {
1698 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1699 # otherwise, fill the result
1700 $biblionumbers .= $line
1701 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1702 warn "result : $value "
1703 . ( $right =~ /\d/ ) . "=="
1704 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1707 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1709 warn "NZAND" if $DEBUG;
1710 $results = NZoperatorAND($biblionumbers,$results);
1713 $results = $biblionumbers;
1719 #do a complete search (all indexes), if index='kw' do complete search too.
1720 my $dbh = C4::Context->dbh;
1723 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1726 # split each word, query the DB and build the biblionumbers result
1727 foreach ( split / /, $string ) {
1728 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1729 warn "search on all indexes on $_" if $DEBUG;
1732 $sth->execute( $server, $_ );
1733 while ( my $line = $sth->fetchrow ) {
1734 $biblionumbers .= $line;
1737 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1739 $results = NZoperatorAND($biblionumbers,$results);
1742 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1743 $results = $biblionumbers;
1747 warn "return : $results for LEAF : $string" if $DEBUG;
1750 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1754 my ($rightresult, $leftresult)=@_;
1756 my @leftresult = split /;/, $leftresult;
1757 warn " @leftresult / $rightresult \n" if $DEBUG;
1759 # my @rightresult = split /;/,$leftresult;
1762 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1763 # the result is stored twice, to have the same weight for AND than OR.
1764 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1765 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1766 foreach (@leftresult) {
1769 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1770 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1771 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1773 "$value-$countvalue;$value-$countvalue;";
1776 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1777 return $finalresult;
1781 my ($rightresult, $leftresult)=@_;
1782 return $rightresult.$leftresult;
1786 my ($leftresult, $rightresult)=@_;
1788 my @leftresult = split /;/, $leftresult;
1790 # my @rightresult = split /;/,$leftresult;
1792 foreach (@leftresult) {
1794 $value=$1 if $value=~m/(.*)-\d+$/;
1795 unless ($rightresult =~ "$value-") {
1796 $finalresult .= "$_;";
1799 return $finalresult;
1804 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1811 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1812 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1814 # order title asc by default
1815 # $ordering = '1=36 <i' unless $ordering;
1816 $results_per_page = 20 unless $results_per_page;
1817 $offset = 0 unless $offset;
1818 my $dbh = C4::Context->dbh;
1821 # order by POPULARITY
1823 if ( $ordering =~ /popularity/ ) {
1827 # popularity is not in MARC record, it's builded from a specific query
1829 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1830 foreach ( split /;/, $biblionumbers ) {
1831 my ( $biblionumber, $title ) = split /,/, $_;
1832 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1833 $sth->execute($biblionumber);
1834 my $popularity = $sth->fetchrow || 0;
1836 # hint : the key is popularity.title because we can have
1837 # many results with the same popularity. In this cas, sub-ordering is done by title
1838 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1839 # (un-frequent, I agree, but we won't forget anything that way ;-)
1840 $popularity{ sprintf( "%10d", $popularity ) . $title
1841 . $biblionumber } = $biblionumber;
1844 # sort the hash and return the same structure as GetRecords (Zebra querying)
1847 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1848 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1849 $result_hash->{'RECORDS'}[ $numbers++ ] =
1850 $result{ $popularity{$key} }->as_usmarc();
1853 else { # sort popularity ASC
1854 foreach my $key ( sort ( keys %popularity ) ) {
1855 $result_hash->{'RECORDS'}[ $numbers++ ] =
1856 $result{ $popularity{$key} }->as_usmarc();
1859 my $finalresult = ();
1860 $result_hash->{'hits'} = $numbers;
1861 $finalresult->{'biblioserver'} = $result_hash;
1862 return $finalresult;
1868 elsif ( $ordering =~ /author/ ) {
1870 foreach ( split /;/, $biblionumbers ) {
1871 my ( $biblionumber, $title ) = split /,/, $_;
1872 my $record = GetMarcBiblio($biblionumber);
1874 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1875 $author = $record->subfield( '200', 'f' );
1876 $author = $record->subfield( '700', 'a' ) unless $author;
1879 $author = $record->subfield( '100', 'a' );
1882 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1883 # and we don't want to get only 1 result for each of them !!!
1884 $result{ $author . $biblionumber } = $record;
1887 # sort the hash and return the same structure as GetRecords (Zebra querying)
1890 if ( $ordering eq 'author_za' ) { # sort by author desc
1891 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1892 $result_hash->{'RECORDS'}[ $numbers++ ] =
1893 $result{$key}->as_usmarc();
1896 else { # sort by author ASC
1897 foreach my $key ( sort ( keys %result ) ) {
1898 $result_hash->{'RECORDS'}[ $numbers++ ] =
1899 $result{$key}->as_usmarc();
1902 my $finalresult = ();
1903 $result_hash->{'hits'} = $numbers;
1904 $finalresult->{'biblioserver'} = $result_hash;
1905 return $finalresult;
1908 # ORDER BY callnumber
1911 elsif ( $ordering =~ /callnumber/ ) {
1913 foreach ( split /;/, $biblionumbers ) {
1914 my ( $biblionumber, $title ) = split /,/, $_;
1915 my $record = GetMarcBiblio($biblionumber);
1917 my ( $callnumber_tag, $callnumber_subfield ) =
1918 GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1919 ( $callnumber_tag, $callnumber_subfield ) =
1920 GetMarcFromKohaField('biblioitems.callnumber')
1921 unless $callnumber_tag;
1922 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1923 $callnumber = $record->subfield( '200', 'f' );
1926 $callnumber = $record->subfield( '100', 'a' );
1929 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1930 # and we don't want to get only 1 result for each of them !!!
1931 $result{ $callnumber . $biblionumber } = $record;
1934 # sort the hash and return the same structure as GetRecords (Zebra querying)
1937 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1938 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1939 $result_hash->{'RECORDS'}[ $numbers++ ] =
1940 $result{$key}->as_usmarc();
1943 else { # sort by title ASC
1944 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1945 $result_hash->{'RECORDS'}[ $numbers++ ] =
1946 $result{$key}->as_usmarc();
1949 my $finalresult = ();
1950 $result_hash->{'hits'} = $numbers;
1951 $finalresult->{'biblioserver'} = $result_hash;
1952 return $finalresult;
1954 elsif ( $ordering =~ /pubdate/ ) { #pub year
1956 foreach ( split /;/, $biblionumbers ) {
1957 my ( $biblionumber, $title ) = split /,/, $_;
1958 my $record = GetMarcBiblio($biblionumber);
1959 my ( $publicationyear_tag, $publicationyear_subfield ) =
1960 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1961 my $publicationyear =
1962 $record->subfield( $publicationyear_tag,
1963 $publicationyear_subfield );
1965 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1966 # and we don't want to get only 1 result for each of them !!!
1967 $result{ $publicationyear . $biblionumber } = $record;
1970 # sort the hash and return the same structure as GetRecords (Zebra querying)
1973 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1974 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1975 $result_hash->{'RECORDS'}[ $numbers++ ] =
1976 $result{$key}->as_usmarc();
1979 else { # sort by pub year ASC
1980 foreach my $key ( sort ( keys %result ) ) {
1981 $result_hash->{'RECORDS'}[ $numbers++ ] =
1982 $result{$key}->as_usmarc();
1985 my $finalresult = ();
1986 $result_hash->{'hits'} = $numbers;
1987 $finalresult->{'biblioserver'} = $result_hash;
1988 return $finalresult;
1994 elsif ( $ordering =~ /title/ ) {
1996 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1998 foreach ( split /;/, $biblionumbers ) {
1999 my ( $biblionumber, $title ) = split /,/, $_;
2001 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2002 # and we don't want to get only 1 result for each of them !!!
2003 # hint & speed improvement : we can order without reading the record
2004 # so order, and read records only for the requested page !
2005 $result{ $title . $biblionumber } = $biblionumber;
2008 # sort the hash and return the same structure as GetRecords (Zebra querying)
2011 if ( $ordering eq 'title_az' ) { # sort by title desc
2012 foreach my $key ( sort ( keys %result ) ) {
2013 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2016 else { # sort by title ASC
2017 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2018 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2022 # limit the $results_per_page to result size if it's more
2023 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2025 # for the requested page, replace biblionumber by the complete record
2026 # speed improvement : avoid reading too much things
2028 my $counter = $offset ;
2029 $counter <= $offset + $results_per_page ;
2033 $result_hash->{'RECORDS'}[$counter] =
2034 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2036 my $finalresult = ();
2037 $result_hash->{'hits'} = $numbers;
2038 $finalresult->{'biblioserver'} = $result_hash;
2039 return $finalresult;
2046 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2049 foreach ( split /;/, $biblionumbers ) {
2050 my ( $biblionumber, $title ) = split /,/, $_;
2051 $title =~ /(.*)-(\d)/;
2056 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2057 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2058 # biblio N has ranking = 6
2059 $count_ranking{$biblionumber} += $ranking;
2062 # build the result by "inverting" the count_ranking hash
2063 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2065 foreach ( keys %count_ranking ) {
2066 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2069 # sort the hash and return the same structure as GetRecords (Zebra querying)
2072 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2073 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2076 # limit the $results_per_page to result size if it's more
2077 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2079 # for the requested page, replace biblionumber by the complete record
2080 # speed improvement : avoid reading too much things
2082 my $counter = $offset ;
2083 $counter <= $offset + $results_per_page ;
2087 $result_hash->{'RECORDS'}[$counter] =
2088 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2089 if $result_hash->{'RECORDS'}[$counter];
2091 my $finalresult = ();
2092 $result_hash->{'hits'} = $numbers;
2093 $finalresult->{'biblioserver'} = $result_hash;
2094 return $finalresult;
2100 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2102 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2103 test parameter if set donot perform change to records in database.
2109 * $listbiblios is an array ref to marcrecords to be changed
2110 * $tagsubfield is the reference of the subfield to change.
2111 * $initvalue is the value to search the record for
2112 * $targetvalue is the value to set the subfield to
2113 * $test is to be set only not to perform changes in database.
2115 =item C<Output arg:>
2116 * $countchanged counts all the changes performed.
2117 * $listunchanged contains the list of all the biblionumbers of records unchanged.
2119 =item C<usage in the script:>
2123 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2124 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2125 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2130 my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2133 my ( $tag, $subfield ) = ( $1, $2 )
2134 if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2135 if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2136 $tag = $tag . $subfield;
2139 my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber');
2140 my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2141 if ($tag eq $itemtag) {
2142 # do not allow the embedded item tag to be
2144 warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2147 foreach my $usmarc (@$listbiblios) {
2149 $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2153 # usmarc is not a valid usmarc May be a biblionumber
2154 # FIXME - sorry, please let's figure out whether
2155 # this function is to be passed a list of
2156 # record numbers or a list of MARC::Record
2157 # objects. The former is probably better
2158 # because the MARC records supplied by Zebra
2159 # may be not current.
2160 $record = GetMarcBiblio($usmarc);
2161 $biblionumber = $usmarc;
2164 if ( $bntag >= 010 ) {
2165 $biblionumber = $record->subfield( $bntag, $bnsubf );
2168 $biblionumber = $record->field($bntag)->data;
2172 #GetBiblionumber is to be written.
2173 #Could be replaced by TransformMarcToKoha (But Would be longer)
2174 if ( $record->field($tag) ) {
2176 foreach my $field ( $record->field($tag) ) {
2179 $field->delete_subfield(
2180 'code' => $subfield,
2181 'match' => qr($initvalue)
2187 $field->update( $subfield, $targetvalue )
2192 if ( $tag >= 010 ) {
2193 if ( $field->delete_field($field) ) {
2199 $field->data = $targetvalue
2200 if ( $field->data =~ qr($initvalue) );
2205 # warn $record->as_formatted;
2207 ModBiblio( $record, $biblionumber,
2208 GetFrameworkCode($biblionumber) )
2212 push @unmatched, $biblionumber;
2216 push @unmatched, $biblionumber;
2219 return ( $countmatched, \@unmatched );
2222 END { } # module clean-up code here (global destructor)
2229 Koha Developement team <info@koha.org>