package C4::Search; # This file is part of Koha. # # Koha is free software; you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # Koha is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, # Suite 330, Boston, MA 02111-1307 USA use strict; require Exporter; use C4::Context; use C4::Biblio; # GetMarcFromKohaField use C4::Koha; # getFacets use Lingua::Stem; use C4::Search::PazPar2; use XML::Simple; use C4::Dates qw(format_date); use C4::XSLT; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG); # set the version for version checking BEGIN { $VERSION = 3.01; $DEBUG = ($ENV{DEBUG}) ? 1 : 0; } =head1 NAME C4::Search - Functions for searching the Koha catalog. =head1 SYNOPSIS See opac/opac-search.pl or catalogue/search.pl for example of usage =head1 DESCRIPTION This module provides searching functions for Koha's bibliographic databases =head1 FUNCTIONS =cut @ISA = qw(Exporter); @EXPORT = qw( &findseealso &FindDuplicate &SimpleSearch &searchResults &getRecords &buildQuery &NZgetRecords &ModBiblios ); # make all your functions, whether exported or not; =head2 findseealso($dbh,$fields); C<$dbh> is a link to the DB handler. use C4::Context; my $dbh =C4::Context->dbh; C<$fields> is a reference to the fields array This function modifies the @$fields array and adds related fields to search on. FIXME: this function is probably deprecated in Koha 3 =cut sub findseealso { my ( $dbh, $fields ) = @_; my $tagslib = GetMarcStructure(1); for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) { my ($tag) = substr( @$fields[$i], 1, 3 ); my ($subfield) = substr( @$fields[$i], 4, 1 ); @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso} if ( $tagslib->{$tag}->{$subfield}->{seealso} ); } } =head2 FindDuplicate ($biblionumber,$biblionumber,$title) = FindDuplicate($record); This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm =cut sub FindDuplicate { my ($record) = @_; my $dbh = C4::Context->dbh; my $result = TransformMarcToKoha( $dbh, $record, '' ); my $sth; my $query; my $search; my $type; my ( $biblionumber, $title ); # search duplicate on ISBN, easy and fast.. # ... normalize first if ( $result->{isbn} ) { $result->{isbn} =~ s/\(.*$//; $result->{isbn} =~ s/\s+$//; $query = "isbn=$result->{isbn}"; } else { $result->{title} =~ s /\\//g; $result->{title} =~ s /\"//g; $result->{title} =~ s /\(//g; $result->{title} =~ s /\)//g; # FIXME: instead of removing operators, could just do # quotes around the value $result->{title} =~ s/(and|or|not)//g; $query = "ti,ext=$result->{title}"; $query .= " and itemtype=$result->{itemtype}" if ( $result->{itemtype} ); if ( $result->{author} ) { $result->{author} =~ s /\\//g; $result->{author} =~ s /\"//g; $result->{author} =~ s /\(//g; $result->{author} =~ s /\)//g; # remove valid operators $result->{author} =~ s/(and|or|not)//g; $query .= " and au,ext=$result->{author}"; } } # FIXME: add error handling my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded ! my @results; foreach my $possible_duplicate_record (@$searchresults) { my $marcrecord = MARC::Record->new_from_usmarc($possible_duplicate_record); my $result = TransformMarcToKoha( $dbh, $marcrecord, '' ); # FIXME :: why 2 $biblionumber ? if ($result) { push @results, $result->{'biblionumber'}; push @results, $result->{'title'}; } } return @results; } =head2 SimpleSearch ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] ); This function provides a simple search API on the bibliographic catalog =over 2 =item C * $query can be a simple keyword or a complete CCL query * @servers is optional. Defaults to biblioserver as found in koha-conf.xml * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef. =item C * $error is a empty unless an error is detected * \@results is an array of records. * $total_hits is the number of hits that would have been returned with no limit =item C =back my ( $error, $marcresults, $total_hits ) = SimpleSearch($query); if (defined $error) { $template->param(query_error => $error); warn "error: ".$error; output_html_with_http_headers $input, $cookie, $template->output; exit; } my $hits = scalar @$marcresults; my @results; for my $i (0..$hits) { my %resultsloop; my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]); my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,''); #build the hash for the template. $resultsloop{highlight} = ($i % 2)?(1):(0); $resultsloop{title} = $biblio->{'title'}; $resultsloop{subtitle} = $biblio->{'subtitle'}; $resultsloop{biblionumber} = $biblio->{'biblionumber'}; $resultsloop{author} = $biblio->{'author'}; $resultsloop{publishercode} = $biblio->{'publishercode'}; $resultsloop{publicationyear} = $biblio->{'publicationyear'}; push @results, \%resultsloop; } $template->param(result=>\@results); =cut sub SimpleSearch { my ( $query, $offset, $max_results, $servers ) = @_; if ( C4::Context->preference('NoZebra') ) { my $result = NZorder( NZanalyse($query) )->{'biblioserver'}; my $search_result = ( $result->{hits} && $result->{hits} > 0 ? $result->{'RECORDS'} : [] ); return ( undef, $search_result, scalar($search_result) ); } else { # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too. my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" ); my @results; my @zoom_queries; my @tmpresults; my @zconns; my $total_hits; return ( "No query entered", undef, undef ) unless $query; # Initialize & Search Zebra for ( my $i = 0 ; $i < @servers ; $i++ ) { eval { $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 ); $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]); $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] ); # error handling my $error = $zconns[$i]->errmsg() . " (" . $zconns[$i]->errcode() . ") " . $zconns[$i]->addinfo() . " " . $zconns[$i]->diagset(); return ( $error, undef, undef ) if $zconns[$i]->errcode(); }; if ($@) { # caught a ZOOM::Exception my $error = $@->message() . " (" . $@->code() . ") " . $@->addinfo() . " " . $@->diagset(); warn $error; return ( $error, undef, undef ); } } while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) { my $event = $zconns[ $i - 1 ]->last_event(); if ( $event == ZOOM::Event::ZEND ) { my $first_record = defined( $offset ) ? $offset+1 : 1; my $hits = $tmpresults[ $i - 1 ]->size(); $total_hits += $hits; my $last_record = $hits; if ( defined $max_results && $offset + $max_results < $hits ) { $last_record = $offset + $max_results; } for my $j ( $first_record..$last_record ) { my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed push @results, $record; } } } foreach my $result (@tmpresults) { $result->destroy(); } foreach my $zoom_query (@zoom_queries) { $zoom_query->destroy(); } return ( undef, \@results, $total_hits ); } } =head2 getRecords ( undef, $results_hashref, \@facets_loop ) = getRecords ( $koha_query, $simple_query, $sort_by_ref, $servers_ref, $results_per_page, $offset, $expanded_facet, $branches, $query_type, $scan ); The all singing, all dancing, multi-server, asynchronous, scanning, searching, record nabbing, facet-building See verbse embedded documentation. =cut sub getRecords { my ( $koha_query, $simple_query, $sort_by_ref, $servers_ref, $results_per_page, $offset, $expanded_facet, $branches, $query_type, $scan ) = @_; my @servers = @$servers_ref; my @sort_by = @$sort_by_ref; # Initialize variables for the ZOOM connection and results object my $zconn; my @zconns; my @results; my $results_hashref = (); # Initialize variables for the faceted results objects my $facets_counter = (); my $facets_info = (); my $facets = getFacets(); my @facets_loop ; # stores the ref to array of hashes for template facets loop ### LOOP THROUGH THE SERVERS for ( my $i = 0 ; $i < @servers ; $i++ ) { $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 ); # perform the search, create the results objects # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query; #$query_to_use = $simple_query if $scan; warn $simple_query if ( $scan and $DEBUG ); # Check if we've got a query_type defined, if so, use it eval { if ($query_type) { if ( $query_type =~ /^ccl/ ) { $query_to_use =~ s/\:/\=/g; # change : to = last minute (FIXME) $results[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) ); } elsif ( $query_type =~ /^cql/ ) { $results[$i] = $zconns[$i]->search( new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) ); } elsif ( $query_type =~ /^pqf/ ) { $results[$i] = $zconns[$i]->search( new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) ); } } else { if ($scan) { $results[$i] = $zconns[$i]->scan( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) ); } else { $results[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) ); } } }; if ($@) { warn "WARNING: query problem with $query_to_use " . $@; } # Concatenate the sort_by limits and pass them to the results object # Note: sort will override rank my $sort_by; foreach my $sort (@sort_by) { if ( $sort eq "author_az" ) { $sort_by .= "1=1003 sort( "yaz", $sort_by ) < 0 ) { warn "WARNING sort $sort_by failed"; } } } # finished looping through servers # The big moment: asynchronously retrieve results from all servers while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) { my $ev = $zconns[ $i - 1 ]->last_event(); if ( $ev == ZOOM::Event::ZEND ) { next unless $results[ $i - 1 ]; my $size = $results[ $i - 1 ]->size(); if ( $size > 0 ) { my $results_hash; # loop through the results $results_hash->{'hits'} = $size; my $times; if ( $offset + $results_per_page <= $size ) { $times = $offset + $results_per_page; } else { $times = $size; } for ( my $j = $offset ; $j < $times ; $j++ ) { my $records_hash; my $record; my $facet_record; ## Check if it's an index scan if ($scan) { my ( $term, $occ ) = $results[ $i - 1 ]->term($j); # here we create a minimal MARC record and hand it off to the # template just like a normal result ... perhaps not ideal, but # it works for now my $tmprecord = MARC::Record->new(); $tmprecord->encoding('UTF-8'); my $tmptitle; my $tmpauthor; # the minimal record in author/title (depending on MARC flavour) if (C4::Context->preference("marcflavour") eq "UNIMARC") { $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ); $tmprecord->append_fields($tmptitle); } else { $tmptitle = MARC::Field->new('245',' ',' ', a => $term,); $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,); $tmprecord->append_fields($tmptitle); $tmprecord->append_fields($tmpauthor); } $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc(); } # not an index scan else { $record = $results[ $i - 1 ]->record($j)->raw(); # warn "RECORD $j:".$record; $results_hash->{'RECORDS'}[$j] = $record; # Fill the facets while we're looping, but only for the biblioserver $facet_record = MARC::Record->new_from_usmarc($record) if $servers[ $i - 1 ] =~ /biblioserver/; #warn $servers[$i-1]."\n".$record; #.$facet_record->title(); if ($facet_record) { for ( my $k = 0 ; $k <= @$facets ; $k++ ) { if ( $facets->[$k] ) { my @fields; for my $tag ( @{ $facets->[$k]->{'tags'} } ) { push @fields, $facet_record->field($tag); } for my $field (@fields) { my @subfields = $field->subfields(); for my $subfield (@subfields) { my ( $code, $data ) = @$subfield; if ( $code eq $facets->[$k]->{'subfield'} ) { $facets_counter->{ $facets->[$k] ->{'link_value'} } ->{$data}++; } } } $facets_info->{ $facets->[$k] ->{'link_value'} }->{'label_value'} = $facets->[$k]->{'label_value'}; $facets_info->{ $facets->[$k] ->{'link_value'} }->{'expanded'} = $facets->[$k]->{'expanded'}; } } } } } $results_hashref->{ $servers[ $i - 1 ] } = $results_hash; } # warn "connection ", $i-1, ": $size hits"; # warn $results[$i-1]->record(0)->render() if $size > 0; # BUILD FACETS if ( $servers[ $i - 1 ] =~ /biblioserver/ ) { for my $link_value ( sort { $facets_counter->{$b} <=> $facets_counter->{$a} } keys %$facets_counter ) { my $expandable; my $number_of_facets; my @this_facets_array; for my $one_facet ( sort { $facets_counter->{$link_value} ->{$b} <=> $facets_counter->{$link_value}->{$a} } keys %{ $facets_counter->{$link_value} } ) { $number_of_facets++; if ( ( $number_of_facets < 6 ) || ( $expanded_facet eq $link_value ) || ( $facets_info->{$link_value}->{'expanded'} ) ) { # Sanitize the link value ), ( will cause errors with CCL, my $facet_link_value = $one_facet; $facet_link_value =~ s/(\(|\))/ /g; # fix the length that will display in the label, my $facet_label_value = $one_facet; $facet_label_value = substr( $one_facet, 0, 20 ) . "..." unless length($facet_label_value) <= 20; # if it's a branch, label by the name, not the code, if ( $link_value =~ /branch/ ) { $facet_label_value = $branches->{$one_facet}->{'branchname'}; } # but we're down with the whole label being in the link's title. my $facet_title_value = $one_facet; push @this_facets_array, ( { facet_count => $facets_counter->{$link_value} ->{$one_facet}, facet_label_value => $facet_label_value, facet_title_value => $facet_title_value, facet_link_value => $facet_link_value, type_link_value => $link_value, }, ); } } # handle expanded option unless ( $facets_info->{$link_value}->{'expanded'} ) { $expandable = 1 if ( ( $number_of_facets > 6 ) && ( $expanded_facet ne $link_value ) ); } push @facets_loop, ( { type_link_value => $link_value, type_id => $link_value . "_id", "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1, facets => \@this_facets_array, expandable => $expandable, expand => $link_value, } ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) ); } } } } return ( undef, $results_hashref, \@facets_loop ); } sub pazGetRecords { my ( $koha_query, $simple_query, $sort_by_ref, $servers_ref, $results_per_page, $offset, $expanded_facet, $branches, $query_type, $scan ) = @_; my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url')); $paz->init(); $paz->search($simple_query); sleep 1; # do results my $results_hashref = {}; my $stats = XMLin($paz->stat); my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1); # for a grouped search result, the number of hits # is the number of groups returned; 'bib_hits' will have # the total number of bibs. $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0]; $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'}; HIT: foreach my $hit (@{ $results->{'hit'} }) { my $recid = $hit->{recid}->[0]; my $work_title = $hit->{'md-work-title'}->[0]; my $work_author; if (exists $hit->{'md-work-author'}) { $work_author = $hit->{'md-work-author'}->[0]; } my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title; my $result_group = {}; $result_group->{'group_label'} = $group_label; $result_group->{'group_merge_key'} = $recid; my $count = 1; if (exists $hit->{count}) { $count = $hit->{count}->[0]; } $result_group->{'group_count'} = $count; for (my $i = 0; $i < $count; $i++) { # FIXME -- may need to worry about diacritics here my $rec = $paz->record($recid, $i); push @{ $result_group->{'RECORDS'} }, $rec; } push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group; } # pass through facets my $termlist_xml = $paz->termlist('author,subject'); my $terms = XMLin($termlist_xml, forcearray => 1); my @facets_loop = (); #die Dumper($results); # foreach my $list (sort keys %{ $terms->{'list'} }) { # my @facets = (); # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) { # push @facets, { # facet_label_value => $facet->{'name'}->[0], # }; # } # push @facets_loop, ( { # type_label => $list, # facets => \@facets, # } ); # } return ( undef, $results_hashref, \@facets_loop ); } # STOPWORDS sub _remove_stopwords { my ( $operand, $index ) = @_; my @stopwords_removed; # phrase and exact-qualified indexes shouldn't have stopwords removed if ( $index !~ m/phr|ext/ ) { # remove stopwords from operand : parse all stopwords & remove them (case insensitive) # we use IsAlpha unicode definition, to deal correctly with diacritics. # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le" # is a stopword, we'd get "çon" and wouldn't find anything... foreach ( keys %{ C4::Context->stopwords } ) { next if ( $_ =~ /(and|or|not)/ ); # don't remove operators if ( $operand =~ /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ ) { $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi; $operand =~ s/^$_\P{IsAlpha}/ /gi; $operand =~ s/\P{IsAlpha}$_$/ /gi; $operand =~ s/$1//gi; push @stopwords_removed, $_; } } } return ( $operand, \@stopwords_removed ); } # TRUNCATION sub _detect_truncation { my ( $operand, $index ) = @_; my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated, @regexpr ); $operand =~ s/^ //g; my @wordlist = split( /\s/, $operand ); foreach my $word (@wordlist) { if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) { push @rightlefttruncated, $word; } elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) { push @lefttruncated, $word; } elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) { push @righttruncated, $word; } elsif ( index( $word, "*" ) < 0 ) { push @nontruncated, $word; } else { push @regexpr, $word; } } return ( \@nontruncated, \@righttruncated, \@lefttruncated, \@rightlefttruncated, \@regexpr ); } # STEMMING sub _build_stemmed_operand { my ($operand) = @_; my $stemmed_operand; # If operand contains a digit, it is almost certainly an identifier, and should # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which # can contain the letter "X" - for example, _build_stemmend_operand would reduce # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098. return $operand if $operand =~ /\d/; # FIXME: the locale should be set based on the user's language and/or search choice my $stemmer = Lingua::Stem->new( -locale => 'EN-US' ); # FIXME: these should be stored in the db so the librarian can modify the behavior $stemmer->add_exceptions( { 'and' => 'and', 'or' => 'or', 'not' => 'not', } ); my @words = split( / /, $operand ); my $stems = $stemmer->stem(@words); for my $stem (@$stems) { $stemmed_operand .= "$stem"; $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 ); $stemmed_operand .= " "; } warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG; return $stemmed_operand; } # FIELD WEIGHTING sub _build_weighted_query { # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works # pretty well but could work much better if we had a smarter query parser my ( $operand, $stemmed_operand, $index ) = @_; my $stemming = C4::Context->preference("QueryStemming") || 0; my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; my $weighted_query .= "(rk=("; # Specifies that we're applying rank # Keyword, or, no index specified if ( ( $index eq 'kw' ) || ( !$index ) ) { $weighted_query .= "Title-cover,ext,r1=\"$operand\""; # exact title-cover $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title #$weighted_query .= " or any,ext,r4=$operand"; # exact any #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\"" if ( $stemming and $stemmed_operand ) ; # add stemming, right truncation $weighted_query .= " or wrdl,r9=\"$operand\""; # embedded sorting: 0 a-z; 1 z-a # $weighted_query .= ") or (sort1,aut=1"; } # Barcode searches should skip this process elsif ( $index eq 'bc' ) { $weighted_query .= "bc=\"$operand\""; } # Authority-number searches should skip this process elsif ( $index eq 'an' ) { $weighted_query .= "an=\"$operand\""; } # If the index already has more than one qualifier, wrap the operand # in quotes and pass it back (assumption is that the user knows what they # are doing and won't appreciate us mucking up their query elsif ( $index =~ ',' ) { $weighted_query .= " $index=\"$operand\""; } #TODO: build better cases based on specific search indexes else { $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)"; $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index $weighted_query .= " or $index,rt,wrdl,r3=\"$operand\""; # word list index } $weighted_query .= "))"; # close rank specification return $weighted_query; } =head2 buildQuery ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan); Build queries and limits in CCL, CGI, Human, handle truncation, stemming, field weighting, stopwords, fuzziness, etc. See verbose embedded documentation. =cut sub buildQuery { my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_; warn "---------\nEnter buildQuery\n---------" if $DEBUG; # dereference my @operators = @$operators if $operators; my @indexes = @$indexes if $indexes; my @operands = @$operands if $operands; my @limits = @$limits if $limits; my @sort_by = @$sort_by if $sort_by; my $stemming = C4::Context->preference("QueryStemming") || 0; my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0; my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0; # no stemming/weight/fuzzy in NoZebra if ( C4::Context->preference("NoZebra") ) { $stemming = 0; $weight_fields = 0; $fuzzy_enabled = 0; } my $query = $operands[0]; my $simple_query = $operands[0]; # initialize the variables we're passing back my $query_cgi; my $query_desc; my $query_type; my $limit; my $limit_cgi; my $limit_desc; my $stopwords_removed; # flag to determine if stopwords have been removed # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps # DIAGNOSTIC ONLY!! if ( $query =~ /^ccl=/ ) { return ( undef, $', $', $', $', '', '', '', '', 'ccl' ); } if ( $query =~ /^cql=/ ) { return ( undef, $', $', $', $', '', '', '', '', 'cql' ); } if ( $query =~ /^pqf=/ ) { return ( undef, $', $', $', $', '', '', '', '', 'pqf' ); } # pass nested queries directly # FIXME: need better handling of some of these variables in this case if ( $query =~ /(\(|\))/ ) { return ( undef, $query, $simple_query, $query_cgi, $query, $limit, $limit_cgi, $limit_desc, $stopwords_removed, 'ccl' ); } # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming # query operands and indexes and add stemming, truncation, field weighting, etc. # Once we do so, we'll end up with a value in $query, just like if we had an # incoming $query from the user else { $query = "" ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query my $previous_operand ; # a flag used to keep track if there was a previous query # if there was, we can apply the current operator # for every operand for ( my $i = 0 ; $i <= @operands ; $i++ ) { # COMBINE OPERANDS, INDEXES AND OPERATORS if ( $operands[$i] ) { # A flag to determine whether or not to add the index to the query my $indexes_set; # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling if ( $operands[$i] =~ /(:|=)/ || $scan ) { $weight_fields = 0; $stemming = 0; $remove_stopwords = 0; } my $operand = $operands[$i]; my $index = $indexes[$i]; # Add index-specific attributes # Date of Publication if ( $index eq 'yr' ) { $index .= ",st-numeric"; $indexes_set++; $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; } # Date of Acquisition elsif ( $index eq 'acqdate' ) { $index .= ",st-date-normalized"; $indexes_set++; $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; } # ISBN,ISSN,Standard Number, don't need special treatment elsif ( $index eq 'nb' || $index eq 'ns' ) { $indexes_set++; ( $stemming, $auto_truncation, $weight_fields, $fuzzy_enabled, $remove_stopwords ) = ( 0, 0, 0, 0, 0 ); } # Set default structure attribute (word list) my $struct_attr; unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) { $struct_attr = ",wrdl"; } # Some helpful index variants my $index_plus = $index . $struct_attr . ":" if $index; my $index_plus_comma = $index . $struct_attr . "," if $index; # Remove Stopwords if ($remove_stopwords) { ( $operand, $stopwords_removed ) = _remove_stopwords( $operand, $index ); warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG; warn "REMOVED STOPWORDS: @$stopwords_removed" if ( $stopwords_removed && $DEBUG ); } # Detect Truncation my ( $nontruncated, $righttruncated, $lefttruncated, $rightlefttruncated, $regexpr ); my $truncated_operand; ( $nontruncated, $righttruncated, $lefttruncated, $rightlefttruncated, $regexpr ) = _detect_truncation( $operand, $index ); warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG; # Apply Truncation if ( scalar(@$righttruncated) + scalar(@$lefttruncated) + scalar(@$rightlefttruncated) > 0 ) { # Don't field weight or add the index to the query, we do it here $indexes_set = 1; undef $weight_fields; my $previous_truncation_operand; if ( scalar(@$nontruncated) > 0 ) { $truncated_operand .= "$index_plus @$nontruncated "; $previous_truncation_operand = 1; } if ( scalar(@$righttruncated) > 0 ) { $truncated_operand .= "and " if $previous_truncation_operand; $truncated_operand .= "$index_plus_comma" . "rtrn:@$righttruncated "; $previous_truncation_operand = 1; } if ( scalar(@$lefttruncated) > 0 ) { $truncated_operand .= "and " if $previous_truncation_operand; $truncated_operand .= "$index_plus_comma" . "ltrn:@$lefttruncated "; $previous_truncation_operand = 1; } if ( scalar(@$rightlefttruncated) > 0 ) { $truncated_operand .= "and " if $previous_truncation_operand; $truncated_operand .= "$index_plus_comma" . "rltrn:@$rightlefttruncated "; $previous_truncation_operand = 1; } } $operand = $truncated_operand if $truncated_operand; warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG; # Handle Stemming my $stemmed_operand; $stemmed_operand = _build_stemmed_operand($operand) if $stemming; warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG; # Handle Field Weighting my $weighted_operand; $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index ) if $weight_fields; warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG; $operand = $weighted_operand if $weight_fields; $indexes_set = 1 if $weight_fields; # If there's a previous operand, we need to add an operator if ($previous_operand) { # User-specified operator if ( $operators[ $i - 1 ] ) { $query .= " $operators[$i-1] "; $query .= " $index_plus " unless $indexes_set; $query .= " $operand"; $query_cgi .= "&op=$operators[$i-1]"; $query_cgi .= "&idx=$index" if $index; $query_cgi .= "&q=$operands[$i]" if $operands[$i]; $query_desc .= " $operators[$i-1] $index_plus $operands[$i]"; } # Default operator is and else { $query .= " and "; $query .= "$index_plus " unless $indexes_set; $query .= "$operand"; $query_cgi .= "&op=and&idx=$index" if $index; $query_cgi .= "&q=$operands[$i]" if $operands[$i]; $query_desc .= " and $index_plus $operands[$i]"; } } # There isn't a pervious operand, don't need an operator else { # Field-weighted queries already have indexes set $query .= " $index_plus " unless $indexes_set; $query .= $operand; $query_desc .= " $index_plus $operands[$i]"; $query_cgi .= "&idx=$index" if $index; $query_cgi .= "&q=$operands[$i]" if $operands[$i]; $previous_operand = 1; } } #/if $operands } # /for } warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG; # add limits my $group_OR_limits; my $availability_limit; foreach my $this_limit (@limits) { if ( $this_limit =~ /available/ ) { # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0) # In English: # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0 $availability_limit .= "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )"; $limit_cgi .= "&limit=available"; $limit_desc .= ""; } # group_OR_limits, prefixed by mc- # OR every member of the group elsif ( $this_limit =~ /mc/ ) { $group_OR_limits .= " or " if $group_OR_limits; $limit_desc .= " or " if $group_OR_limits; $group_OR_limits .= "$this_limit"; $limit_cgi .= "&limit=$this_limit"; $limit_desc .= " $this_limit"; } # Regular old limits else { $limit .= " and " if $limit || $query; $limit .= "$this_limit"; $limit_cgi .= "&limit=$this_limit"; $limit_desc .= " $this_limit"; } } if ($group_OR_limits) { $limit .= " and " if ( $query || $limit ); $limit .= "($group_OR_limits)"; } if ($availability_limit) { $limit .= " and " if ( $query || $limit ); $limit .= "($availability_limit)"; } # Normalize the query and limit strings $query =~ s/:/=/g; $limit =~ s/:/=/g; for ( $query, $query_desc, $limit, $limit_desc ) { $_ =~ s/ / /g; # remove extra spaces $_ =~ s/^ //g; # remove any beginning spaces $_ =~ s/ $//g; # remove any ending spaces $_ =~ s/==/=/g; # remove double == from query } $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi for ($query_cgi,$simple_query) { $_ =~ s/"//g; } # append the limit to the query $query .= " " . $limit; # Warnings if DEBUG if ($DEBUG) { warn "QUERY:" . $query; warn "QUERY CGI:" . $query_cgi; warn "QUERY DESC:" . $query_desc; warn "LIMIT:" . $limit; warn "LIMIT CGI:" . $limit_cgi; warn "LIMIT DESC:" . $limit_desc; warn "---------\nLeave buildQuery\n---------"; } return ( undef, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, $stopwords_removed, $query_type ); } =head2 searchResults Format results in a form suitable for passing to the template =cut # IMO this subroutine is pretty messy still -- it's responsible for # building the HTML output for the template sub searchResults { my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_; my $dbh = C4::Context->dbh; my $even = 1; my @newresults; # add search-term highlighting via s on the search terms my $span_terms_hashref; for my $span_term ( split( / /, $searchdesc ) ) { $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g; $span_terms_hashref->{$span_term}++; } #Build branchnames hash #find branchname #get branch information..... my %branches; my $bsth = $dbh->prepare("SELECT branchcode,branchname FROM branches") ; # FIXME : use C4::Koha::GetBranches $bsth->execute(); while ( my $bdata = $bsth->fetchrow_hashref ) { $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'}; } # FIXME - We build an authorised values hash here, using the default framework # though it is possible to have different authvals for different fws. my $shelflocations =GetKohaAuthorisedValues('items.location',''); # get notforloan authorised value list (see $shelflocations FIXME) my $notforloan_authorised_value = GetAuthValCode('items.notforloan',''); #Build itemtype hash #find itemtype & itemtype image my %itemtypes; $bsth = $dbh->prepare( "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes" ); $bsth->execute(); while ( my $bdata = $bsth->fetchrow_hashref ) { foreach (qw(description imageurl summary notforloan)) { $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_}; } } #search item field code my $sth = $dbh->prepare( "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'" ); $sth->execute; my ($itemtag) = $sth->fetchrow; ## find column names of items related to MARC my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items"); $sth2->execute; my %subfieldstosearch; while ( ( my $column ) = $sth2->fetchrow ) { my ( $tagfield, $tagsubfield ) = &GetMarcFromKohaField( "items." . $column, "" ); $subfieldstosearch{$column} = $tagsubfield; } # handle which records to actually retrieve my $times; if ( $hits && $offset + $results_per_page <= $hits ) { $times = $offset + $results_per_page; } else { $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it? } # loop through all of the records we've retrieved for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) { my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] ); my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' ); $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, ''); $oldbiblio->{result_number} = $i + 1; # add imageurl to itemtype if there is one $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} ); my $biblio_authorised_value_images = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{biblionumber} ) ); $oldbiblio->{authorised_value_images} = $biblio_authorised_value_images; my $aisbn = $oldbiblio->{'isbn'}; $aisbn =~ /(\d*[X]*)/; $oldbiblio->{amazonisbn} = $1; $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description}; # Build summary if there is one (the summary is defined in the itemtypes table) # FIXME: is this used anywhere, I think it can be commented out? -- JF if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) { my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary}; my @fields = $marcrecord->fields(); foreach my $field (@fields) { my $tag = $field->tag(); my $tagvalue = $field->as_string(); $summary =~ s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g; unless ( $tag < 10 ) { my @subf = $field->subfields; for my $i ( 0 .. $#subf ) { my $subfieldcode = $subf[$i][0]; my $subfieldvalue = $subf[$i][1]; my $tagsubf = $tag . $subfieldcode; $summary =~ s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g; } } } # FIXME: yuk $summary =~ s/\[(.*?)]//g; $summary =~ s/\n//g; $oldbiblio->{summary} = $summary; } # save an author with no tag, for the > link $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'}; $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'}; # Add search-term highlighting to the whole record where they match using s if (C4::Context->preference("OpacHighlightedWords")){ my $searchhighlightblob; for my $highlight_field ( $marcrecord->fields ) { # FIXME: need to skip title, subtitle, author, etc., as they are handled below next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields for my $subfield ($highlight_field->subfields()) { my $match; next if $subfield->[0] eq '9'; my $field = $subfield->[1]; for my $term ( keys %$span_terms_hashref ) { if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) { $field =~ s/$term/$&<\/span>/gi; $match++; } } $searchhighlightblob .= $field . " ... " if $match; } } $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob; $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob; } # Add search-term highlighting to the title, subtitle, etc. fields for my $term ( keys %$span_terms_hashref ) { my $old_term = $term; if ( length($term) > 3 ) { $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g; foreach(qw(title subtitle author publishercode place pages notes size)) { $oldbiblio->{$_} =~ s/$term/$&<\/span>/gi; } } } ($i % 2) and $oldbiblio->{'toggle'} = 1; # Pull out the items fields my @fields = $marcrecord->field($itemtag); # Setting item statuses for display my @available_items_loop; my @onloan_items_loop; my @other_items_loop; my $available_items; my $onloan_items; my $other_items; my $ordered_count = 0; my $available_count = 0; my $onloan_count = 0; my $longoverdue_count = 0; my $other_count = 0; my $wthdrawn_count = 0; my $itemlost_count = 0; my $itembinding_count = 0; my $itemdamaged_count = 0; my $item_in_transit_count = 0; my $can_place_holds = 0; my $items_count = scalar(@fields); my $items_counter; my $maxitems = ( C4::Context->preference('maxItemsinSearchResults') ) ? C4::Context->preference('maxItemsinSearchResults') - 1 : 1; # loop through every item foreach my $field (@fields) { my $item; $items_counter++; # populate the items hash foreach my $code ( keys %subfieldstosearch ) { $item->{$code} = $field->subfield( $subfieldstosearch{$code} ); } my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch'; my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch'; # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one if ($item->{$hbranch}) { $item->{'branchname'} = $branches{$item->{$hbranch}}; } elsif ($item->{$otherbranch}) { # Last resort $item->{'branchname'} = $branches{$item->{$otherbranch}}; } my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber}; # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item if ( $item->{onloan} ) { $onloan_count++; my $key = $prefix . $item->{due_date}; $onloan_items->{$key}->{due_date} = format_date($item->{onloan}); $onloan_items->{$key}->{count}++ if $item->{homebranch}; $onloan_items->{$key}->{branchname} = $item->{branchname}; $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} }; $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber}; $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} ); # if something's checked out and lost, mark it as 'long overdue' if ( $item->{itemlost} ) { $onloan_items->{$prefix}->{longoverdue}++; $longoverdue_count++; } else { # can place holds as long as item isn't lost $can_place_holds = 1; } } # items not on loan, but still unavailable ( lost, withdrawn, damaged ) else { # item is on order if ( $item->{notforloan} == -1 ) { $ordered_count++; } # is item in transit? my $transfertwhen = ''; my ($transfertfrom, $transfertto); unless ($item->{wthdrawn} || $item->{itemlost} || $item->{damaged} || $item->{notforloan} || $items_count > 20) { # A couple heuristics to limit how many times # we query the database for item transfer information, sacrificing # accuracy in some cases for speed; # # 1. don't query if item has one of the other statuses # 2. don't check transit status if the bib has # more than 20 items # # FIXME: to avoid having the query the database like this, and to make # the in transit status count as unavailable for search limiting, # should map transit status to record indexed in Zebra. # ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber}); } # item is withdrawn, lost or damaged if ( $item->{wthdrawn} || $item->{itemlost} || $item->{damaged} || $item->{notforloan} || ($transfertwhen ne '')) { $wthdrawn_count++ if $item->{wthdrawn}; $itemlost_count++ if $item->{itemlost}; $itemdamaged_count++ if $item->{damaged}; $item_in_transit_count++ if $transfertwhen ne ''; $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan}; $other_count++; my $key = $prefix . $item->{status}; foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) { $other_items->{$key}->{$_} = $item->{$_}; } $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0; $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value; $other_items->{$key}->{count}++ if $item->{homebranch}; $other_items->{$key}->{location} = $shelflocations->{ $item->{location} }; $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} ); } # item is available else { $can_place_holds = 1; $available_count++; $available_items->{$prefix}->{count}++ if $item->{homebranch}; foreach (qw(branchname itemcallnumber)) { $available_items->{$prefix}->{$_} = $item->{$_}; } $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} }; $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} ); } } } # notforloan, item level and biblioitem level my ( $availableitemscount, $onloanitemscount, $otheritemscount ); $maxitems = ( C4::Context->preference('maxItemsinSearchResults') ) ? C4::Context->preference('maxItemsinSearchResults') - 1 : 1; for my $key ( sort keys %$onloan_items ) { (++$onloanitemscount > $maxitems) and last; push @onloan_items_loop, $onloan_items->{$key}; } for my $key ( sort keys %$other_items ) { (++$otheritemscount > $maxitems) and last; push @other_items_loop, $other_items->{$key}; } for my $key ( sort keys %$available_items ) { (++$availableitemscount > $maxitems) and last; push @available_items_loop, $available_items->{$key} } # XSLT processing of some stuff if (C4::Context->preference("XSLTResultsDisplay") && !$scan) { my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber},C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl"); $oldbiblio->{XSLTResultsRecord} = $newxmlrecord; } # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan}; $oldbiblio->{norequests} = 1 unless $can_place_holds; $oldbiblio->{itemsplural} = 1 if $items_count > 1; $oldbiblio->{items_count} = $items_count; $oldbiblio->{available_items_loop} = \@available_items_loop; $oldbiblio->{onloan_items_loop} = \@onloan_items_loop; $oldbiblio->{other_items_loop} = \@other_items_loop; $oldbiblio->{availablecount} = $available_count; $oldbiblio->{availableplural} = 1 if $available_count > 1; $oldbiblio->{onloancount} = $onloan_count; $oldbiblio->{onloanplural} = 1 if $onloan_count > 1; $oldbiblio->{othercount} = $other_count; $oldbiblio->{otherplural} = 1 if $other_count > 1; $oldbiblio->{wthdrawncount} = $wthdrawn_count; $oldbiblio->{itemlostcount} = $itemlost_count; $oldbiblio->{damagedcount} = $itemdamaged_count; $oldbiblio->{intransitcount} = $item_in_transit_count; $oldbiblio->{orderedcount} = $ordered_count; $oldbiblio->{isbn} =~ s/-//g; # deleting - in isbn to enable amazon content $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'} ) ); push( @newresults, $oldbiblio ); } return @newresults; } #---------------------------------------------------------------------- # # Non-Zebra GetRecords# #---------------------------------------------------------------------- =head2 NZgetRecords NZgetRecords has the same API as zera getRecords, even if some parameters are not managed =cut sub NZgetRecords { my ( $query, $simple_query, $sort_by_ref, $servers_ref, $results_per_page, $offset, $expanded_facet, $branches, $query_type, $scan ) = @_; warn "query =$query" if $DEBUG; my $result = NZanalyse($query); warn "results =$result" if $DEBUG; return ( undef, NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ), undef ); } =head2 NZanalyse NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,... the list is built from an inverted index in the nozebra SQL table note that title is here only for convenience : the sorting will be very fast when requested on title if the sorting is requested on something else, we will have to reread all results, and that may be longer. =cut sub NZanalyse { my ( $string, $server ) = @_; # warn "---------" if $DEBUG; warn " NZanalyse" if $DEBUG; # warn "---------" if $DEBUG; # $server contains biblioserver or authorities, depending on what we search on. #warn "querying : $string on $server"; $server = 'biblioserver' unless $server; # if we have a ", replace the content to discard temporarily any and/or/not inside my $commacontent; if ( $string =~ /"/ ) { $string =~ s/"(.*?)"/__X__/; $commacontent = $1; warn "commacontent : $commacontent" if $DEBUG; } # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y" # then, call again NZanalyse with $left and $right # (recursive until we find a leaf (=> something without and/or/not) # delete repeated operator... Would then go in infinite loop while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) { } #process parenthesis before. if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) { my $left = $1; my $right = $4; my $operator = lc($3); # FIXME: and/or/not are operators, not operands warn "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right" if $DEBUG; my $leftresult = NZanalyse( $left, $server ); if ($operator) { my $rightresult = NZanalyse( $right, $server ); # OK, we have the results for right and left part of the query # depending of operand, intersect, union or exclude both lists # to get a result list if ( $operator eq ' and ' ) { return NZoperatorAND($leftresult,$rightresult); } elsif ( $operator eq ' or ' ) { # just merge the 2 strings return $leftresult . $rightresult; } elsif ( $operator eq ' not ' ) { return NZoperatorNOT($leftresult,$rightresult); } } else { # this error is impossible, because of the regexp that isolate the operand, but just in case... return $leftresult; } } warn "string :" . $string if $DEBUG; my $left = ""; my $right = ""; my $operator = ""; if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) { $left = $1; $right = $3; $operator = lc($2); # FIXME: and/or/not are operators, not operands } warn "no parenthesis. left : $left operator: $operator right: $right" if $DEBUG; # it's not a leaf, we have a and/or/not if ($operator) { # reintroduce comma content if needed $right =~ s/__X__/"$commacontent"/ if $commacontent; $left =~ s/__X__/"$commacontent"/ if $commacontent; warn "node : $left / $operator / $right\n" if $DEBUG; my $leftresult = NZanalyse( $left, $server ); my $rightresult = NZanalyse( $right, $server ); warn " leftresult : $leftresult" if $DEBUG; warn " rightresult : $rightresult" if $DEBUG; # OK, we have the results for right and left part of the query # depending of operand, intersect, union or exclude both lists # to get a result list if ( $operator eq ' and ' ) { warn "NZAND"; return NZoperatorAND($leftresult,$rightresult); } elsif ( $operator eq ' or ' ) { # just merge the 2 strings return $leftresult . $rightresult; } elsif ( $operator eq ' not ' ) { return NZoperatorNOT($leftresult,$rightresult); } else { # this error is impossible, because of the regexp that isolate the operand, but just in case... die "error : operand unknown : $operator for $string"; } # it's a leaf, do the real SQL query and return the result } else { $string =~ s/__X__/"$commacontent"/ if $commacontent; $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g; #remove trailing blank at the beginning $string =~ s/^ //g; warn "leaf:$string" if $DEBUG; # parse the string in in operator/operand/value again my $left = ""; my $operator = ""; my $right = ""; if ($string =~ /(.*)(>=|<=)(.*)/) { $left = $1; $operator = $2; $right = $3; } else { $left = $string; } # warn "handling leaf... left:$left operator:$operator right:$right" # if $DEBUG; unless ($operator) { if ($string =~ /(.*)(>|<|=)(.*)/) { $left = $1; $operator = $2; $right = $3; warn "handling unless (operator)... left:$left operator:$operator right:$right" if $DEBUG; } else { $left = $string; } } my $results; # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr... $left =~ s/ .*$//; # automatic replace for short operators $left = 'title' if $left =~ '^ti$'; $left = 'author' if $left =~ '^au$'; $left = 'publisher' if $left =~ '^pb$'; $left = 'subject' if $left =~ '^su$'; $left = 'koha-Auth-Number' if $left =~ '^an$'; $left = 'keyword' if $left =~ '^kw$'; warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG; if ( $operator && $left ne 'keyword' ) { #do a specific search my $dbh = C4::Context->dbh; $operator = 'LIKE' if $operator eq '=' and $right =~ /%/; my $sth = $dbh->prepare( "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?" ); warn "$left / $operator / $right\n" if $DEBUG; # split each word, query the DB and build the biblionumbers result #sanitizing leftpart $left =~ s/^\s+|\s+$//; foreach ( split / /, $right ) { my $biblionumbers; $_ =~ s/^\s+|\s+$//; next unless $_; warn "EXECUTE : $server, $left, $_" if $DEBUG; $sth->execute( $server, $left, $_ ) or warn "execute failed: $!"; while ( my ( $line, $value ) = $sth->fetchrow ) { # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <) # otherwise, fill the result $biblionumbers .= $line unless ( $right =~ /^\d+$/ && $value =~ /\D/ ); warn "result : $value " . ( $right =~ /\d/ ) . "==" . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line"; } # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { warn "NZAND" if $DEBUG; $results = NZoperatorAND($biblionumbers,$results); } else { $results = $biblionumbers; } } } else { #do a complete search (all indexes), if index='kw' do complete search too. my $dbh = C4::Context->dbh; my $sth = $dbh->prepare( "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?" ); # split each word, query the DB and build the biblionumbers result foreach ( split / /, $string ) { next if C4::Context->stopwords->{ uc($_) }; # skip if stopword warn "search on all indexes on $_" if $DEBUG; my $biblionumbers; next unless $_; $sth->execute( $server, $_ ); while ( my $line = $sth->fetchrow ) { $biblionumbers .= $line; } # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { $results = NZoperatorAND($biblionumbers,$results); } else { warn "NEW RES for $_ = $biblionumbers" if $DEBUG; $results = $biblionumbers; } } } warn "return : $results for LEAF : $string" if $DEBUG; return $results; } warn "---------\nLeave NZanalyse\n---------" if $DEBUG; } sub NZoperatorAND{ my ($rightresult, $leftresult)=@_; my @leftresult = split /;/, $leftresult; warn " @leftresult / $rightresult \n" if $DEBUG; # my @rightresult = split /;/,$leftresult; my $finalresult; # parse the left results, and if the biblionumber exist in the right result, save it in finalresult # the result is stored twice, to have the same weight for AND than OR. # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64 foreach (@leftresult) { my $value = $_; my $countvalue; ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/); if ( $rightresult =~ /\Q$value\E-(\d+);/ ) { $countvalue = ( $1 > $countvalue ? $countvalue : $1 ); $finalresult .= "$value-$countvalue;$value-$countvalue;"; } } warn "NZAND DONE : $finalresult \n" if $DEBUG; return $finalresult; } sub NZoperatorOR{ my ($rightresult, $leftresult)=@_; return $rightresult.$leftresult; } sub NZoperatorNOT{ my ($leftresult, $rightresult)=@_; my @leftresult = split /;/, $leftresult; # my @rightresult = split /;/,$leftresult; my $finalresult; foreach (@leftresult) { my $value=$_; $value=$1 if $value=~m/(.*)-\d+$/; unless ($rightresult =~ "$value-") { $finalresult .= "$_;"; } } return $finalresult; } =head2 NZorder $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset); TODO :: Description =cut sub NZorder { my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_; warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG; # order title asc by default # $ordering = '1=36 dbh; # # order by POPULARITY # if ( $ordering =~ /popularity/ ) { my %result; my %popularity; # popularity is not in MARC record, it's builded from a specific query my $sth = $dbh->prepare("select sum(issues) from items where biblionumber=?"); foreach ( split /;/, $biblionumbers ) { my ( $biblionumber, $title ) = split /,/, $_; $result{$biblionumber} = GetMarcBiblio($biblionumber); $sth->execute($biblionumber); my $popularity = $sth->fetchrow || 0; # hint : the key is popularity.title because we can have # many results with the same popularity. In this cas, sub-ordering is done by title # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity # (un-frequent, I agree, but we won't forget anything that way ;-) $popularity{ sprintf( "%10d", $popularity ) . $title . $biblionumber } = $biblionumber; } # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers = 0; if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{ $popularity{$key} }->as_usmarc(); } } else { # sort popularity ASC foreach my $key ( sort ( keys %popularity ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{ $popularity{$key} }->as_usmarc(); } } my $finalresult = (); $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; # # ORDER BY author # } elsif ( $ordering =~ /author/ ) { my %result; foreach ( split /;/, $biblionumbers ) { my ( $biblionumber, $title ) = split /,/, $_; my $record = GetMarcBiblio($biblionumber); my $author; if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) { $author = $record->subfield( '200', 'f' ); $author = $record->subfield( '700', 'a' ) unless $author; } else { $author = $record->subfield( '100', 'a' ); } # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title # and we don't want to get only 1 result for each of them !!! $result{ $author . $biblionumber } = $record; } # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers = 0; if ( $ordering eq 'author_za' ) { # sort by author desc foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}->as_usmarc(); } } else { # sort by author ASC foreach my $key ( sort ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}->as_usmarc(); } } my $finalresult = (); $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; # # ORDER BY callnumber # } elsif ( $ordering =~ /callnumber/ ) { my %result; foreach ( split /;/, $biblionumbers ) { my ( $biblionumber, $title ) = split /,/, $_; my $record = GetMarcBiblio($biblionumber); my $callnumber; my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField( $dbh, 'items.itemcallnumber' ); ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber') unless $callnumber_tag; if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) { $callnumber = $record->subfield( '200', 'f' ); } else { $callnumber = $record->subfield( '100', 'a' ); } # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title # and we don't want to get only 1 result for each of them !!! $result{ $callnumber . $biblionumber } = $record; } # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers = 0; if ( $ordering eq 'call_number_dsc' ) { # sort by title desc foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}->as_usmarc(); } } else { # sort by title ASC foreach my $key ( sort { $a cmp $b } ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}->as_usmarc(); } } my $finalresult = (); $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; } elsif ( $ordering =~ /pubdate/ ) { #pub year my %result; foreach ( split /;/, $biblionumbers ) { my ( $biblionumber, $title ) = split /,/, $_; my $record = GetMarcBiblio($biblionumber); my ( $publicationyear_tag, $publicationyear_subfield ) = GetMarcFromKohaField( 'biblioitems.publicationyear', '' ); my $publicationyear = $record->subfield( $publicationyear_tag, $publicationyear_subfield ); # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title # and we don't want to get only 1 result for each of them !!! $result{ $publicationyear . $biblionumber } = $record; } # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers = 0; if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}->as_usmarc(); } } else { # sort by pub year ASC foreach my $key ( sort ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}->as_usmarc(); } } my $finalresult = (); $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; # # ORDER BY title # } elsif ( $ordering =~ /title/ ) { # the title is in the biblionumbers string, so we just need to build a hash, sort it and return my %result; foreach ( split /;/, $biblionumbers ) { my ( $biblionumber, $title ) = split /,/, $_; # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title # and we don't want to get only 1 result for each of them !!! # hint & speed improvement : we can order without reading the record # so order, and read records only for the requested page ! $result{ $title . $biblionumber } = $biblionumber; } # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers = 0; if ( $ordering eq 'title_az' ) { # sort by title desc foreach my $key ( sort ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}; } } else { # sort by title ASC foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}; } } # limit the $results_per_page to result size if it's more $results_per_page = $numbers - 1 if $numbers < $results_per_page; # for the requested page, replace biblionumber by the complete record # speed improvement : avoid reading too much things for ( my $counter = $offset ; $counter <= $offset + $results_per_page ; $counter++ ) { $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc; } my $finalresult = (); $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; } else { # # order by ranking # # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking my %result; my %count_ranking; foreach ( split /;/, $biblionumbers ) { my ( $biblionumber, $title ) = split /,/, $_; $title =~ /(.*)-(\d)/; # get weight my $ranking = $2; # note that we + the ranking because ranking is calculated on weight of EACH term requested. # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N # biblio N has ranking = 6 $count_ranking{$biblionumber} += $ranking; } # build the result by "inverting" the count_ranking hash # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead # warn "counting"; foreach ( keys %count_ranking ) { $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_; } # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; my $numbers = 0; foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}; } # limit the $results_per_page to result size if it's more $results_per_page = $numbers - 1 if $numbers < $results_per_page; # for the requested page, replace biblionumber by the complete record # speed improvement : avoid reading too much things for ( my $counter = $offset ; $counter <= $offset + $results_per_page ; $counter++ ) { $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc if $result_hash->{'RECORDS'}[$counter]; } my $finalresult = (); $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; } } =head2 ModBiblios ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test); this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios test parameter if set donot perform change to records in database. =over 2 =item C * $listbiblios is an array ref to marcrecords to be changed * $tagsubfield is the reference of the subfield to change. * $initvalue is the value to search the record for * $targetvalue is the value to set the subfield to * $test is to be set only not to perform changes in database. =item C * $countchanged counts all the changes performed. * $listunchanged contains the list of all the biblionumbers of records unchanged. =item C =back my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);; #If one wants to display unchanged records, you should get biblios foreach @$listunchanged $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged); =cut sub ModBiblios { my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_; my $countmatched; my @unmatched; my ( $tag, $subfield ) = ( $1, $2 ) if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ ); if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) { $tag = $tag . $subfield; undef $subfield; } my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber'); my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber'); if ($tag eq $itemtag) { # do not allow the embedded item tag to be # edited from here warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed"; return (0, []); } foreach my $usmarc (@$listbiblios) { my $record; $record = eval { MARC::Record->new_from_usmarc($usmarc) }; my $biblionumber; if ($@) { # usmarc is not a valid usmarc May be a biblionumber # FIXME - sorry, please let's figure out whether # this function is to be passed a list of # record numbers or a list of MARC::Record # objects. The former is probably better # because the MARC records supplied by Zebra # may be not current. $record = GetMarcBiblio($usmarc); $biblionumber = $usmarc; } else { if ( $bntag >= 010 ) { $biblionumber = $record->subfield( $bntag, $bnsubf ); } else { $biblionumber = $record->field($bntag)->data; } } #GetBiblionumber is to be written. #Could be replaced by TransformMarcToKoha (But Would be longer) if ( $record->field($tag) ) { my $modify = 0; foreach my $field ( $record->field($tag) ) { if ($subfield) { if ( $field->delete_subfield( 'code' => $subfield, 'match' => qr($initvalue) ) ) { $countmatched++; $modify = 1; $field->update( $subfield, $targetvalue ) if ($targetvalue); } } else { if ( $tag >= 010 ) { if ( $field->delete_field($field) ) { $countmatched++; $modify = 1; } } else { $field->data = $targetvalue if ( $field->data =~ qr($initvalue) ); } } } # warn $record->as_formatted; if ($modify) { ModBiblio( $record, $biblionumber, GetFrameworkCode($biblionumber) ) unless ($test); } else { push @unmatched, $biblionumber; } } else { push @unmatched, $biblionumber; } } return ( $countmatched, \@unmatched ); } END { } # module clean-up code here (global destructor) 1; __END__ =head1 AUTHOR Koha Developement team =cut