C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField
  22 use C4::Koha;      # getFacets
  23 use Lingua::Stem;
  24 use C4::Dates qw(format_date);
  25
  26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  27
  28 # set the version for version checking
  29 BEGIN {
  30     $VERSION = 3.01;
  31     $DEBUG = ( $ENV{DEBUG} ) ? 1 : 0;
  32 }
  33
  34 =head1 NAME
  35
  36 C4::Search - Functions for searching the Koha catalog.
  37
  38 =head1 SYNOPSIS
  39
  40 See opac/opac-search.pl or catalogue/search.pl for example of usage
  41
  42 =head1 DESCRIPTION
  43
  44 This module provides searching functions for Koha's bibliographic databases
  45
  46 =head1 FUNCTIONS
  47
  48 =cut
  49
  50 @ISA    = qw(Exporter);
  51 @EXPORT = qw(
  52   &findseealso
  53   &FindDuplicate
  54   &SimpleSearch
  55   &searchResults
  56   &getRecords
  57   &buildQuery
  58   &NZgetRecords
  59   &ModBiblios
  60 );
  61
  62 # make all your functions, whether exported or not;
  63
  64 =head2 findseealso($dbh,$fields);
  65
  66 C<$dbh> is a link to the DB handler.
  67
  68 use C4::Context;
  69 my $dbh =C4::Context->dbh;
  70
  71 C<$fields> is a reference to the fields array
  72
  73 This function modifies the @$fields array and adds related fields to search on.
  74
  75 FIXME: this function is probably deprecated in Koha 3
  76
  77 =cut
  78
  79 sub findseealso {
  80     my ( $dbh, $fields ) = @_;
  81     my $tagslib = GetMarcStructure(1);
  82     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  83         my ($tag)      = substr( @$fields[$i], 1, 3 );
  84         my ($subfield) = substr( @$fields[$i], 4, 1 );
  85         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  86           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  87     }
  88 }
  89
  90 =head2 FindDuplicate
  91
  92 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  93
  94 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  95
  96 =cut
  97
  98 sub FindDuplicate {
  99     my ($record) = @_;
 100     my $dbh = C4::Context->dbh;
 101     my $result = TransformMarcToKoha( $dbh, $record, '' );
 102     my $sth;
 103     my $query;
 104     my $search;
 105     my $type;
 106     my ( $biblionumber, $title );
 107
 108     # search duplicate on ISBN, easy and fast..
 109     # ... normalize first
 110     if ( $result->{isbn} ) {
 111         $result->{isbn} =~ s/\(.*$//;
 112         $result->{isbn} =~ s/\s+$//;
 113         $query = "isbn=$result->{isbn}";
 114     }
 115     else {
 116         $result->{title} =~ s /\\//g;
 117         $result->{title} =~ s /\"//g;
 118         $result->{title} =~ s /\(//g;
 119         $result->{title} =~ s /\)//g;
 120
 121         # FIXME: instead of removing operators, could just do
 122         # quotes around the value
 123         $result->{title} =~ s/(and|or|not)//g;
 124         $query = "ti,ext=$result->{title}";
 125         $query .= " and itemtype=$result->{itemtype}"
 126           if ( $result->{itemtype} );
 127         if   ( $result->{author} ) {
 128             $result->{author} =~ s /\\//g;
 129             $result->{author} =~ s /\"//g;
 130             $result->{author} =~ s /\(//g;
 131             $result->{author} =~ s /\)//g;
 132
 133             # remove valid operators
 134             $result->{author} =~ s/(and|or|not)//g;
 135             $query .= " and au,ext=$result->{author}";
 136         }
 137     }
 138
 139     # FIXME: add error handling
 140     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 141     my @results;
 142     foreach my $possible_duplicate_record (@$searchresults) {
 143         my $marcrecord =
 144           MARC::Record->new_from_usmarc($possible_duplicate_record);
 145         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 146
 147         # FIXME :: why 2 $biblionumber ?
 148         if ($result) {
 149             push @results, $result->{'biblionumber'};
 150             push @results, $result->{'title'};
 151         }
 152     }
 153     return @results;
 154 }
 155
 156 =head2 SimpleSearch
 157
 158 ($error,$results) = SimpleSearch($query,@servers);
 159
 160 This function provides a simple search API on the bibliographic catalog
 161
 162 =over 2
 163
 164 =item C<input arg:>
 165
 166     * $query can be a simple keyword or a complete CCL query
 167     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 168
 169 =item C<Output arg:>
 170     * $error is a empty unless an error is detected
 171     * \@results is an array of records.
 172
 173 =item C<usage in the script:>
 174
 175 =back
 176
 177 my ($error, $marcresults) = SimpleSearch($query);
 178
 179 if (defined $error) {
 180     $template->param(query_error => $error);
 181     warn "error: ".$error;
 182     output_html_with_http_headers $input, $cookie, $template->output;
 183     exit;
 184 }
 185
 186 my $hits = scalar @$marcresults;
 187 my @results;
 188
 189 for(my $i=0;$i<$hits;$i++) {
 190     my %resultsloop;
 191     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 192     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 193
 194     #build the hash for the template.
 195     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 196     $resultsloop{title}           = $biblio->{'title'};
 197     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 198     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 199     $resultsloop{author}          = $biblio->{'author'};
 200     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 201     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 202
 203     push @results, \%resultsloop;
 204 }
 205
 206 $template->param(result=>\@results);
 207
 208 =cut
 209
 210 sub SimpleSearch {
 211     my $query = shift;
 212     if ( C4::Context->preference('NoZebra') ) {
 213         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 214         my $search_result =
 215           (      $result->{hits}
 216               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 217         return ( undef, $search_result );
 218     }
 219     else {
 220         my @servers = @_;
 221         my @results;
 222         my @tmpresults;
 223         my @zconns;
 224         return ( "No query entered", undef ) unless $query;
 225
 226         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 227         @servers = ("biblioserver") unless @servers;
 228
 229         # Initialize & Search Zebra
 230         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 231             eval {
 232                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 233                 $tmpresults[$i] =
 234                   $zconns[$i]
 235                   ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
 236
 237                 # error handling
 238                 my $error =
 239                     $zconns[$i]->errmsg() . " ("
 240                   . $zconns[$i]->errcode() . ") "
 241                   . $zconns[$i]->addinfo() . " "
 242                   . $zconns[$i]->diagset();
 243
 244                 return ( $error, undef ) if $zconns[$i]->errcode();
 245             };
 246             if ($@) {
 247
 248                 # caught a ZOOM::Exception
 249                 my $error =
 250                     $@->message() . " ("
 251                   . $@->code() . ") "
 252                   . $@->addinfo() . " "
 253                   . $@->diagset();
 254                 warn $error;
 255                 return ( $error, undef );
 256             }
 257         }
 258         my $hits;
 259         my $ev;
 260         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 261             $ev = $zconns[ $i - 1 ]->last_event();
 262             if ( $ev == ZOOM::Event::ZEND ) {
 263                 $hits = $tmpresults[ $i - 1 ]->size();
 264             }
 265             if ( $hits > 0 ) {
 266                 for ( my $j = 0 ; $j < $hits ; $j++ ) {
 267                     my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
 268                     push @results, $record;
 269                 }
 270             }
 271         }
 272         return ( undef, \@results );
 273     }
 274 }
 275
 276 =head2 getRecords
 277
 278 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 279
 280         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 281         $results_per_page, $offset,       $expanded_facet, $branches,
 282         $query_type,       $scan
 283     );
 284
 285 The all singing, all dancing, multi-server, asynchronous, scanning,
 286 searching, record nabbing, facet-building
 287
 288 See verbse embedded documentation.
 289
 290 =cut
 291
 292 sub getRecords {
 293     my (
 294         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 295         $results_per_page, $offset,       $expanded_facet, $branches,
 296         $query_type,       $scan
 297     ) = @_;
 298
 299     my @servers = @$servers_ref;
 300     my @sort_by = @$sort_by_ref;
 301
 302     # Initialize variables for the ZOOM connection and results object
 303     my $zconn;
 304     my @zconns;
 305     my @results;
 306     my $results_hashref = ();
 307
 308     # Initialize variables for the faceted results objects
 309     my $facets_counter = ();
 310     my $facets_info    = ();
 311     my $facets         = getFacets();
 312
 313     my @facets_loop
 314       ;    # stores the ref to array of hashes for template facets loop
 315
 316     ### LOOP THROUGH THE SERVERS
 317     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 318         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 319
 320 # perform the search, create the results objects
 321 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 322         my $query_to_use;
 323         if ( $servers[$i] =~ /biblioserver/ ) {
 324             $query_to_use = $koha_query;
 325         }
 326         else {
 327             $query_to_use = $simple_query;
 328         }
 329
 330         #$query_to_use = $simple_query if $scan;
 331         warn $simple_query if ( $scan and $DEBUG );
 332
 333         # Check if we've got a query_type defined, if so, use it
 334         eval {
 335             if ($query_type)
 336             {
 337                 if ( $query_type =~ /^ccl/ ) {
 338                     $query_to_use =~
 339                       s/\:/\=/g;    # change : to = last minute (FIXME)
 340                     $results[$i] =
 341                       $zconns[$i]->search(
 342                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 343                       );
 344                 }
 345                 elsif ( $query_type =~ /^cql/ ) {
 346                     $results[$i] =
 347                       $zconns[$i]->search(
 348                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 349                 }
 350                 elsif ( $query_type =~ /^pqf/ ) {
 351                     $results[$i] =
 352                       $zconns[$i]->search(
 353                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 354                 }
 355             }
 356             else {
 357                 if ($scan) {
 358                     $results[$i] =
 359                       $zconns[$i]->scan(
 360                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 361                       );
 362                 }
 363                 else {
 364                     $results[$i] =
 365                       $zconns[$i]->search(
 366                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 367                       );
 368                 }
 369             }
 370         };
 371         if ($@) {
 372             warn "WARNING: query problem with $query_to_use " . $@;
 373         }
 374
 375         # Concatenate the sort_by limits and pass them to the results object
 376         # Note: sort will override rank
 377         my $sort_by;
 378         foreach my $sort (@sort_by) {
 379             if ( $sort eq "author_az" ) {
 380                 $sort_by .= "1=1003 <i ";
 381             }
 382             elsif ( $sort eq "author_za" ) {
 383                 $sort_by .= "1=1003 >i ";
 384             }
 385             elsif ( $sort eq "popularity_asc" ) {
 386                 $sort_by .= "1=9003 <i ";
 387             }
 388             elsif ( $sort eq "popularity_dsc" ) {
 389                 $sort_by .= "1=9003 >i ";
 390             }
 391             elsif ( $sort eq "call_number_asc" ) {
 392                 $sort_by .= "1=20  <i ";
 393             }
 394             elsif ( $sort eq "call_number_dsc" ) {
 395                 $sort_by .= "1=20 >i ";
 396             }
 397             elsif ( $sort eq "pubdate_asc" ) {
 398                 $sort_by .= "1=31 <i ";
 399             }
 400             elsif ( $sort eq "pubdate_dsc" ) {
 401                 $sort_by .= "1=31 >i ";
 402             }
 403             elsif ( $sort eq "acqdate_asc" ) {
 404                 $sort_by .= "1=32 <i ";
 405             }
 406             elsif ( $sort eq "acqdate_dsc" ) {
 407                 $sort_by .= "1=32 >i ";
 408             }
 409             elsif ( $sort eq "title_az" ) {
 410                 $sort_by .= "1=4 <i ";
 411             }
 412             elsif ( $sort eq "title_za" ) {
 413                 $sort_by .= "1=4 >i ";
 414             }
 415         }
 416         if ($sort_by) {
 417             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 418                 warn "WARNING sort $sort_by failed";
 419             }
 420         }
 421     }    # finished looping through servers
 422
 423     # The big moment: asynchronously retrieve results from all servers
 424     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 425         my $ev = $zconns[ $i - 1 ]->last_event();
 426         if ( $ev == ZOOM::Event::ZEND ) {
 427             next unless $results[ $i - 1 ];
 428             my $size = $results[ $i - 1 ]->size();
 429             if ( $size > 0 ) {
 430                 my $results_hash;
 431
 432                 # loop through the results
 433                 $results_hash->{'hits'} = $size;
 434                 my $times;
 435                 if ( $offset + $results_per_page <= $size ) {
 436                     $times = $offset + $results_per_page;
 437                 }
 438                 else {
 439                     $times = $size;
 440                 }
 441                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 442                     my $records_hash;
 443                     my $record;
 444                     my $facet_record;
 445
 446                     ## Check if it's an index scan
 447                     if ($scan) {
 448                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 449
 450                  # here we create a minimal MARC record and hand it off to the
 451                  # template just like a normal result ... perhaps not ideal, but
 452                  # it works for now
 453                         my $tmprecord = MARC::Record->new();
 454                         $tmprecord->encoding('UTF-8');
 455                         my $tmptitle;
 456                         my $tmpauthor;
 457
 458                 # the minimal record in author/title (depending on MARC flavour)
 459                         if ( C4::Context->preference("marcflavour") eq
 460                             "UNIMARC" )
 461                         {
 462                             $tmptitle = MARC::Field->new(
 463                                 '200', ' ', ' ',
 464                                 a => $term,
 465                                 f => $occ
 466                             );
 467                         }
 468                         else {
 469                             $tmptitle =
 470                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 471                             $tmpauthor =
 472                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 473                         }
 474                         $tmprecord->append_fields($tmptitle);
 475                         $tmprecord->append_fields($tmpauthor);
 476                         $results_hash->{'RECORDS'}[$j] =
 477                           $tmprecord->as_usmarc();
 478                     }
 479
 480                     # not an index scan
 481                     else {
 482                         $record = $results[ $i - 1 ]->record($j)->raw();
 483
 484                         # warn "RECORD $j:".$record;
 485                         $results_hash->{'RECORDS'}[$j] = $record;
 486
 487             # Fill the facets while we're looping, but only for the biblioserver
 488                         $facet_record = MARC::Record->new_from_usmarc($record)
 489                           if $servers[ $i - 1 ] =~ /biblioserver/;
 490
 491                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 492                         if ($facet_record) {
 493                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 494
 495                                 if ( $facets->[$k] ) {
 496                                     my @fields;
 497                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 498                                     {
 499                                         push @fields,
 500                                           $facet_record->field($tag);
 501                                     }
 502                                     for my $field (@fields) {
 503                                         my @subfields = $field->subfields();
 504                                         for my $subfield (@subfields) {
 505                                             my ( $code, $data ) = @$subfield;
 506                                             if ( $code eq
 507                                                 $facets->[$k]->{'subfield'} )
 508                                             {
 509                                                 $facets_counter->{ $facets->[$k]
 510                                                       ->{'link_value'} }
 511                                                   ->{$data}++;
 512                                             }
 513                                         }
 514                                     }
 515                                     $facets_info->{ $facets->[$k]
 516                                           ->{'link_value'} }->{'label_value'} =
 517                                       $facets->[$k]->{'label_value'};
 518                                     $facets_info->{ $facets->[$k]
 519                                           ->{'link_value'} }->{'expanded'} =
 520                                       $facets->[$k]->{'expanded'};
 521                                 }
 522                             }
 523                         }
 524                     }
 525                 }
 526                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 527             }
 528
 529             # warn "connection ", $i-1, ": $size hits";
 530             # warn $results[$i-1]->record(0)->render() if $size > 0;
 531
 532             # BUILD FACETS
 533             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 534                 for my $link_value (
 535                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 536                     keys %$facets_counter )
 537                 {
 538                     my $expandable;
 539                     my $number_of_facets;
 540                     my @this_facets_array;
 541                     for my $one_facet (
 542                         sort {
 543                             $facets_counter->{$link_value}
 544                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 545                         } keys %{ $facets_counter->{$link_value} }
 546                       )
 547                     {
 548                         $number_of_facets++;
 549                         if (   ( $number_of_facets < 6 )
 550                             || ( $expanded_facet eq $link_value )
 551                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 552                         {
 553
 554                       # Sanitize the link value ), ( will cause errors with CCL,
 555                             my $facet_link_value = $one_facet;
 556                             $facet_link_value =~ s/(\(|\))/ /g;
 557
 558                             # fix the length that will display in the label,
 559                             my $facet_label_value = $one_facet;
 560                             $facet_label_value =
 561                               substr( $one_facet, 0, 20 ) . "..."
 562                               unless length($facet_label_value) <= 20;
 563
 564                             # if it's a branch, label by the name, not the code,
 565                             if ( $link_value =~ /branch/ ) {
 566                                 $facet_label_value =
 567                                   $branches->{$one_facet}->{'branchname'};
 568                             }
 569
 570                 # but we're down with the whole label being in the link's title.
 571                             my $facet_title_value = $one_facet;
 572
 573                             push @this_facets_array,
 574                               (
 575                                 {
 576                                     facet_count =>
 577                                       $facets_counter->{$link_value}
 578                                       ->{$one_facet},
 579                                     facet_label_value => $facet_label_value,
 580                                     facet_title_value => $facet_title_value,
 581                                     facet_link_value  => $facet_link_value,
 582                                     type_link_value   => $link_value,
 583                                 },
 584                               );
 585                         }
 586                     }
 587
 588                     # handle expanded option
 589                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 590                         $expandable = 1
 591                           if ( ( $number_of_facets > 6 )
 592                             && ( $expanded_facet ne $link_value ) );
 593                     }
 594                     push @facets_loop,
 595                       (
 596                         {
 597                             type_link_value => $link_value,
 598                             type_id         => $link_value . "_id",
 599                             type_label =>
 600                               $facets_info->{$link_value}->{'label_value'},
 601                             facets     => \@this_facets_array,
 602                             expandable => $expandable,
 603                             expand     => $link_value,
 604                         }
 605                       );
 606                 }
 607             }
 608         }
 609     }
 610     return ( undef, $results_hashref, \@facets_loop );
 611 }
 612
 613 # STOPWORDS
 614 sub _remove_stopwords {
 615     my ( $operand, $index ) = @_;
 616     my @stopwords_removed;
 617
 618     # phrase and exact-qualified indexes shouldn't have stopwords removed
 619     if ( $index !~ m/phr|ext/ ) {
 620
 621 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 622 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 623 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 624 #       is a stopword, we'd get "çon" and wouldn't find anything...
 625         foreach ( keys %{ C4::Context->stopwords } ) {
 626             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 627             if ( $operand =~
 628                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
 629             {
 630                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 631                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 632                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 633                 push @stopwords_removed, $_;
 634             }
 635         }
 636     }
 637     return ( $operand, \@stopwords_removed );
 638 }
 639
 640 # TRUNCATION
 641 sub _detect_truncation {
 642     my ( $operand, $index ) = @_;
 643     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 644         @regexpr );
 645     $operand =~ s/^ //g;
 646     my @wordlist = split( /\s/, $operand );
 647     foreach my $word (@wordlist) {
 648         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 649             push @rightlefttruncated, $word;
 650         }
 651         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 652             push @lefttruncated, $word;
 653         }
 654         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 655             push @righttruncated, $word;
 656         }
 657         elsif ( index( $word, "*" ) < 0 ) {
 658             push @nontruncated, $word;
 659         }
 660         else {
 661             push @regexpr, $word;
 662         }
 663     }
 664     return (
 665         \@nontruncated,       \@righttruncated, \@lefttruncated,
 666         \@rightlefttruncated, \@regexpr
 667     );
 668 }
 669
 670 # STEMMING
 671 sub _build_stemmed_operand {
 672     my ($operand) = @_;
 673     my $stemmed_operand;
 674
 675 # FIXME: the locale should be set based on the user's language and/or search choice
 676     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 677
 678 # FIXME: these should be stored in the db so the librarian can modify the behavior
 679     $stemmer->add_exceptions(
 680         {
 681             'and' => 'and',
 682             'or'  => 'or',
 683             'not' => 'not',
 684         }
 685     );
 686     my @words = split( / /, $operand );
 687     my $stems = $stemmer->stem(@words);
 688     for my $stem (@$stems) {
 689         $stemmed_operand .= "$stem";
 690         $stemmed_operand .= "?"
 691           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 692         $stemmed_operand .= " ";
 693     }
 694     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 695     return $stemmed_operand;
 696 }
 697
 698 # FIELD WEIGHTING
 699 sub _build_weighted_query {
 700
 701 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 702 # pretty well but could work much better if we had a smarter query parser
 703     my ( $operand, $stemmed_operand, $index ) = @_;
 704     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 705     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 706     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 707
 708     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 709
 710     # Keyword, or, no index specified
 711     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 712         $weighted_query .=
 713           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 714         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 715         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 716           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 717           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 718         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 719           if $fuzzy_enabled;    # add fuzzy, word list
 720         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 721           if ( $stemming and $stemmed_operand )
 722           ;                     # add stemming, right truncation
 723         $weighted_query .= " or wrdl,r9=\"$operand\"";
 724
 725         # embedded sorting: 0 a-z; 1 z-a
 726         # $weighted_query .= ") or (sort1,aut=1";
 727     }
 728
 729     # Barcode searches should skip this process
 730     elsif ( $index eq 'bc' ) {
 731         $weighted_query .= "bc=\"$operand\"";
 732     }
 733
 734     # Authority-number searches should skip this process
 735     elsif ( $index eq 'an' ) {
 736         $weighted_query .= "an=\"$operand\"";
 737     }
 738
 739     # If the index already has more than one qualifier, wrap the operand
 740     # in quotes and pass it back (assumption is that the user knows what they
 741     # are doing and won't appreciate us mucking up their query
 742     elsif ( $index =~ ',' ) {
 743         $weighted_query .= " $index=\"$operand\"";
 744     }
 745
 746     #TODO: build better cases based on specific search indexes
 747     else {
 748         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 749           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 750         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 751         $weighted_query .=
 752           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 753     }
 754
 755     $weighted_query .= "))";                       # close rank specification
 756     return $weighted_query;
 757 }
 758
 759 =head2 buildQuery
 760
 761 ( $error, $query,
 762 $simple_query, $query_cgi,
 763 $query_desc, $limit,
 764 $limit_cgi, $limit_desc,
 765 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 766
 767 Build queries and limits in CCL, CGI, Human,
 768 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 769
 770 See verbose embedded documentation.
 771
 772
 773 =cut
 774
 775 sub buildQuery {
 776     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 777
 778     warn "---------"        if $DEBUG;
 779     warn "Enter buildQuery" if $DEBUG;
 780     warn "---------"        if $DEBUG;
 781
 782     # dereference
 783     my @operators = @$operators if $operators;
 784     my @indexes   = @$indexes   if $indexes;
 785     my @operands  = @$operands  if $operands;
 786     my @limits    = @$limits    if $limits;
 787     my @sort_by   = @$sort_by   if $sort_by;
 788
 789     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 790     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 791     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 792     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 793     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 794
 795     # no stemming/weight/fuzzy in NoZebra
 796     if ( C4::Context->preference("NoZebra") ) {
 797         $stemming      = 0;
 798         $weight_fields = 0;
 799         $fuzzy_enabled = 0;
 800     }
 801
 802     my $query        = $operands[0];
 803     my $simple_query = $operands[0];
 804
 805     # initialize the variables we're passing back
 806     my $query_cgi;
 807     my $query_desc;
 808     my $query_type;
 809
 810     my $limit;
 811     my $limit_cgi;
 812     my $limit_desc;
 813
 814     my $stopwords_removed;    # flag to determine if stopwords have been removed
 815
 816 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 817 # DIAGNOSTIC ONLY!!
 818     if ( $query =~ /^ccl=/ ) {
 819         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 820     }
 821     if ( $query =~ /^cql=/ ) {
 822         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 823     }
 824     if ( $query =~ /^pqf=/ ) {
 825         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 826     }
 827
 828     # pass nested queries directly
 829     # FIXME: need better handling of some of these variables in this case
 830     if ( $query =~ /(\(|\))/ ) {
 831         return (
 832             undef,              $query, $simple_query, $query_cgi,
 833             $query,             $limit, $limit_cgi,    $limit_desc,
 834             $stopwords_removed, 'ccl'
 835         );
 836     }
 837
 838 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 839 # query operands and indexes and add stemming, truncation, field weighting, etc.
 840 # Once we do so, we'll end up with a value in $query, just like if we had an
 841 # incoming $query from the user
 842     else {
 843         $query = ""
 844           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 845         my $previous_operand
 846           ;    # a flag used to keep track if there was a previous query
 847                # if there was, we can apply the current operator
 848                # for every operand
 849         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 850
 851             # COMBINE OPERANDS, INDEXES AND OPERATORS
 852             if ( $operands[$i] ) {
 853
 854               # A flag to determine whether or not to add the index to the query
 855                 my $indexes_set;
 856
 857 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 858                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 859                     $weight_fields    = 0;
 860                     $stemming         = 0;
 861                     $remove_stopwords = 0;
 862                 }
 863                 my $operand = $operands[$i];
 864                 my $index   = $indexes[$i];
 865
 866                 # Add index-specific attributes
 867                 # Date of Publication
 868                 if ( $index eq 'yr' ) {
 869                     $index .= ",st-numeric";
 870                     $indexes_set++;
 871                     (
 872                         $stemming,      $auto_truncation,
 873                         $weight_fields, $fuzzy_enabled,
 874                         $remove_stopwords
 875                     ) = ( 0, 0, 0, 0, 0 );
 876                 }
 877
 878                 # Date of Acquisition
 879                 elsif ( $index eq 'acqdate' ) {
 880                     $index .= ",st-date-normalized";
 881                     $indexes_set++;
 882                     (
 883                         $stemming,      $auto_truncation,
 884                         $weight_fields, $fuzzy_enabled,
 885                         $remove_stopwords
 886                     ) = ( 0, 0, 0, 0, 0 );
 887                 }
 888
 889                 # Set default structure attribute (word list)
 890                 my $struct_attr;
 891                 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 892                     $struct_attr = ",wrdl";
 893                 }
 894
 895                 # Some helpful index variants
 896                 my $index_plus       = $index . $struct_attr . ":" if $index;
 897                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 898
 899                 # Remove Stopwords
 900                 if ($remove_stopwords) {
 901                     ( $operand, $stopwords_removed ) =
 902                       _remove_stopwords( $operand, $index );
 903                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 904                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 905                       if ( $stopwords_removed && $DEBUG );
 906                 }
 907
 908                 # Detect Truncation
 909                 my ( $nontruncated, $righttruncated, $lefttruncated,
 910                     $rightlefttruncated, $regexpr );
 911                 my $truncated_operand;
 912                 (
 913                     $nontruncated, $righttruncated, $lefttruncated,
 914                     $rightlefttruncated, $regexpr
 915                 ) = _detect_truncation( $operand, $index );
 916                 warn
 917 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 918                   if $DEBUG;
 919
 920                 # Apply Truncation
 921                 if (
 922                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 923                     scalar(@$rightlefttruncated) > 0 )
 924                 {
 925
 926                # Don't field weight or add the index to the query, we do it here
 927                     $indexes_set = 1;
 928                     undef $weight_fields;
 929                     my $previous_truncation_operand;
 930                     if ( scalar(@$nontruncated) > 0 ) {
 931                         $truncated_operand .= "$index_plus @$nontruncated ";
 932                         $previous_truncation_operand = 1;
 933                     }
 934                     if ( scalar(@$righttruncated) > 0 ) {
 935                         $truncated_operand .= "and "
 936                           if $previous_truncation_operand;
 937                         $truncated_operand .=
 938                           "$index_plus_comma" . "rtrn:@$righttruncated ";
 939                         $previous_truncation_operand = 1;
 940                     }
 941                     if ( scalar(@$lefttruncated) > 0 ) {
 942                         $truncated_operand .= "and "
 943                           if $previous_truncation_operand;
 944                         $truncated_operand .=
 945                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
 946                         $previous_truncation_operand = 1;
 947                     }
 948                     if ( scalar(@$rightlefttruncated) > 0 ) {
 949                         $truncated_operand .= "and "
 950                           if $previous_truncation_operand;
 951                         $truncated_operand .=
 952                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
 953                         $previous_truncation_operand = 1;
 954                     }
 955                 }
 956                 $operand = $truncated_operand if $truncated_operand;
 957                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
 958
 959                 # Handle Stemming
 960                 my $stemmed_operand;
 961                 $stemmed_operand = _build_stemmed_operand($operand)
 962                   if $stemming;
 963                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
 964
 965                 # Handle Field Weighting
 966                 my $weighted_operand;
 967                 $weighted_operand =
 968                   _build_weighted_query( $operand, $stemmed_operand, $index )
 969                   if $weight_fields;
 970                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
 971                 $operand = $weighted_operand if $weight_fields;
 972                 $indexes_set = 1 if $weight_fields;
 973
 974                 # If there's a previous operand, we need to add an operator
 975                 if ($previous_operand) {
 976
 977                     # User-specified operator
 978                     if ( $operators[ $i - 1 ] ) {
 979                         $query     .= " $operators[$i-1] ";
 980                         $query     .= " $index_plus " unless $indexes_set;
 981                         $query     .= " $operand";
 982                         $query_cgi .= "&op=$operators[$i-1]";
 983                         $query_cgi .= "&idx=$index" if $index;
 984                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
 985                         $query_desc .=
 986                           " $operators[$i-1] $index_plus $operands[$i]";
 987                     }
 988
 989                     # Default operator is and
 990                     else {
 991                         $query      .= " and ";
 992                         $query      .= "$index_plus " unless $indexes_set;
 993                         $query      .= "$operand";
 994                         $query_cgi  .= "&op=and&idx=$index" if $index;
 995                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
 996                         $query_desc .= " and $index_plus $operands[$i]";
 997                     }
 998                 }
 999
1000                 # There isn't a pervious operand, don't need an operator
1001                 else {
1002
1003                     # Field-weighted queries already have indexes set
1004                     $query .= " $index_plus " unless $indexes_set;
1005                     $query .= $operand;
1006                     $query_desc .= " $index_plus $operands[$i]";
1007                     $query_cgi  .= "&idx=$index" if $index;
1008                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1009                     $previous_operand = 1;
1010                 }
1011             }    #/if $operands
1012         }    # /for
1013     }
1014     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1015
1016     # add limits
1017     my $group_OR_limits;
1018     my $availability_limit;
1019     foreach my $this_limit (@limits) {
1020         if ( $this_limit =~ /available/ ) {
1021
1022 # available is defined as (items.notloan is NULL) and (items.itemlost > 0 or NULL) (last clause handles NULL values for lost in zebra)
1023 # all records not indexed in the onloan register and allrecords not indexed in the lost register, or where the value of lost is equal to or less than 0
1024             $availability_limit .=
1025 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and ((lost,st-numeric <= 0) or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1026             $limit_cgi  .= "&limit=available";
1027             $limit_desc .= "";
1028         }
1029
1030         # group_OR_limits, prefixed by mc-
1031         # OR every member of the group
1032         elsif ( $this_limit =~ /mc/ ) {
1033             $group_OR_limits .= " or " if $group_OR_limits;
1034             $limit_desc      .= " or " if $group_OR_limits;
1035             $group_OR_limits .= "$this_limit";
1036             $limit_cgi       .= "&limit=$this_limit";
1037             $limit_desc      .= " $this_limit";
1038         }
1039
1040         # Regular old limits
1041         else {
1042             $limit .= " and " if $limit || $query;
1043             $limit      .= "$this_limit";
1044             $limit_cgi  .= "&limit=$this_limit";
1045             $limit_desc .= " $this_limit";
1046         }
1047     }
1048     if ($group_OR_limits) {
1049         $limit .= " and " if ( $query || $limit );
1050         $limit .= "($group_OR_limits)";
1051     }
1052     if ($availability_limit) {
1053         $limit .= " and " if ( $query || $limit );
1054         $limit .= "($availability_limit)";
1055     }
1056
1057     # Normalize the query and limit strings
1058     $query =~ s/:/=/g;
1059     $limit =~ s/:/=/g;
1060     for ( $query, $query_desc, $limit, $limit_desc ) {
1061         $_ =~ s/  / /g;    # remove extra spaces
1062         $_ =~ s/^ //g;     # remove any beginning spaces
1063         $_ =~ s/ $//g;     # remove any ending spaces
1064         $_ =~ s/==/=/g;    # remove double == from query
1065
1066     }
1067     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1068
1069     # append the limit to the query
1070     $query .= " " . $limit;
1071
1072     # Warnings if DEBUG
1073     if ($DEBUG) {
1074         warn "QUERY:" . $query;
1075         warn "QUERY CGI:" . $query_cgi;
1076         warn "QUERY DESC:" . $query_desc;
1077         warn "LIMIT:" . $limit;
1078         warn "LIMIT CGI:" . $limit_cgi;
1079         warn "LIMIT DESC:" . $limit_desc;
1080         warn "---------";
1081         warn "Leave buildQuery";
1082         warn "---------";
1083     }
1084     return (
1085         undef,              $query, $simple_query, $query_cgi,
1086         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1087         $stopwords_removed, $query_type
1088     );
1089 }
1090
1091 =head2 searchResults
1092
1093 Format results in a form suitable for passing to the template
1094
1095 =cut
1096
1097 # IMO this subroutine is pretty messy still -- it's responsible for
1098 # building the HTML output for the template
1099 sub searchResults {
1100     my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1101     my $dbh = C4::Context->dbh;
1102     my $toggle;
1103     my $even = 1;
1104     my @newresults;
1105
1106     # add search-term highlighting via <span>s on the search terms
1107     my $span_terms_hashref;
1108     for my $span_term ( split( / /, $searchdesc ) ) {
1109         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1110         $span_terms_hashref->{$span_term}++;
1111     }
1112
1113     #Build branchnames hash
1114     #find branchname
1115     #get branch information.....
1116     my %branches;
1117     my $bsth =
1118       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1119       ;    # FIXME : use C4::Koha::GetBranches
1120     $bsth->execute();
1121     while ( my $bdata = $bsth->fetchrow_hashref ) {
1122         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1123     }
1124     my %locations;
1125     my $lsch =
1126       $dbh->prepare(
1127 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1128       );
1129     $lsch->execute();
1130     while ( my $ldata = $lsch->fetchrow_hashref ) {
1131         $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1132     }
1133
1134     #Build itemtype hash
1135     #find itemtype & itemtype image
1136     my %itemtypes;
1137     $bsth =
1138       $dbh->prepare(
1139         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1140       );
1141     $bsth->execute();
1142     while ( my $bdata = $bsth->fetchrow_hashref ) {
1143         $itemtypes{ $bdata->{'itemtype'} }->{description} =
1144           $bdata->{'description'};
1145         $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1146         $itemtypes{ $bdata->{'itemtype'} }->{summary}  = $bdata->{'summary'};
1147         $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1148           $bdata->{'notforloan'};
1149     }
1150
1151     #search item field code
1152     my $sth =
1153       $dbh->prepare(
1154 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1155       );
1156     $sth->execute;
1157     my ($itemtag) = $sth->fetchrow;
1158
1159     # get notforloan authorised value list
1160     $sth =
1161       $dbh->prepare(
1162 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1163       );
1164     $sth->execute;
1165     my ($notforloan_authorised_value) = $sth->fetchrow;
1166
1167     ## find column names of items related to MARC
1168     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1169     $sth2->execute;
1170     my %subfieldstosearch;
1171     while ( ( my $column ) = $sth2->fetchrow ) {
1172         my ( $tagfield, $tagsubfield ) =
1173           &GetMarcFromKohaField( "items." . $column, "" );
1174         $subfieldstosearch{$column} = $tagsubfield;
1175     }
1176
1177     # handle which records to actually retrieve
1178     my $times;
1179     if ( $hits && $offset + $results_per_page <= $hits ) {
1180         $times = $offset + $results_per_page;
1181     }
1182     else {
1183         $times = $hits;
1184     }
1185
1186     # loop through all of the records we've retrieved
1187     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1188         my $marcrecord;
1189         $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1190         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1191         $oldbiblio->{result_number} = $i + 1;
1192
1193         # add imageurl to itemtype if there is one
1194         if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1195             $oldbiblio->{imageurl} =
1196               $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1197             $oldbiblio->{description} =
1198               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1199         }
1200         else {
1201             $oldbiblio->{imageurl} =
1202               getitemtypeimagesrc() . "/"
1203               . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1204               if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1205             $oldbiblio->{description} =
1206               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1207         }
1208
1209  # Build summary if there is one (the summary is defined in the itemtypes table)
1210  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1211         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1212             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1213             my @fields  = $marcrecord->fields();
1214             foreach my $field (@fields) {
1215                 my $tag      = $field->tag();
1216                 my $tagvalue = $field->as_string();
1217                 $summary =~
1218                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1219                 unless ( $tag < 10 ) {
1220                     my @subf = $field->subfields;
1221                     for my $i ( 0 .. $#subf ) {
1222                         my $subfieldcode  = $subf[$i][0];
1223                         my $subfieldvalue = $subf[$i][1];
1224                         my $tagsubf       = $tag . $subfieldcode;
1225                         $summary =~
1226 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1227                     }
1228                 }
1229             }
1230             # FIXME: yuk
1231             $summary =~ s/\[(.*?)]//g;
1232             $summary =~ s/\n/<br>/g;
1233             $oldbiblio->{summary} = $summary;
1234         }
1235
1236 # Add search-term highlighting to the whole record where they match using <span>s
1237         my $searchhighlightblob;
1238         for my $highlight_field ( $marcrecord->fields ) {
1239
1240   # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1241             next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1242             my $match;
1243             my $field = $highlight_field->as_string();
1244             for my $term ( keys %$span_terms_hashref ) {
1245                 if ( ( $field =~ /$term/i ) && ( length($term) > 3 ) ) {
1246                     $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1247                     $match++;
1248                 }
1249             }
1250
1251             # FIXME: we might want to limit the size of these fields if we
1252             # want to get fancy
1253             $searchhighlightblob .= $field . " ... " if $match;
1254         }
1255         $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1256
1257 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1258         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1259
1260         # Add search-term highlighting to the title, subtitle, etc. fields
1261         for my $term ( keys %$span_terms_hashref ) {
1262             my $old_term = $term;
1263             if ( length($term) > 3 ) {
1264                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1265                 $oldbiblio->{'title'} =~
1266                   s/$term/<span class=\"term\">$&<\/span>/gi;
1267                 $oldbiblio->{'subtitle'} =~
1268                   s/$term/<span class=\"term\">$&<\/span>/gi;
1269                 $oldbiblio->{'author'} =~
1270                   s/$term/<span class=\"term\">$&<\/span>/gi;
1271                 $oldbiblio->{'publishercode'} =~
1272                   s/$term/<span class=\"term\">$&<\/span>/gi;
1273                 $oldbiblio->{'place'} =~
1274                   s/$term/<span class=\"term\">$&<\/span>/gi;
1275                 $oldbiblio->{'pages'} =~
1276                   s/$term/<span class=\"term\">$&<\/span>/gi;
1277                 $oldbiblio->{'notes'} =~
1278                   s/$term/<span class=\"term\">$&<\/span>/gi;
1279                 $oldbiblio->{'size'} =~
1280                   s/$term/<span class=\"term\">$&<\/span>/gi;
1281             }
1282         }
1283
1284         # FIXME:
1285         # surely there's a better way to handle this
1286         if ( $i % 2 ) {
1287             $toggle = "#ffffcc";
1288         }
1289         else {
1290             $toggle = "white";
1291         }
1292         $oldbiblio->{'toggle'} = $toggle;
1293
1294         # Pull out the items fields
1295         my @fields = $marcrecord->field($itemtag);
1296
1297         # Setting item statuses for display
1298         my @available_items_loop;
1299         my @onloan_items_loop;
1300         my @other_items_loop;
1301
1302         my $available_items;
1303         my $onloan_items;
1304         my $other_items;
1305
1306         my $ordered_count     = 0;
1307         my $available_count   = 0;
1308         my $onloan_count      = 0;
1309         my $longoverdue_count = 0;
1310         my $other_count       = 0;
1311         my $wthdrawn_count    = 0;
1312         my $itemlost_count    = 0;
1313         my $itembinding_count = 0;
1314         my $itemdamaged_count = 0;
1315         my $can_place_holds   = 0;
1316         my $items_count       = scalar(@fields);
1317         my $items_counter;
1318         my $maxitems =
1319           ( C4::Context->preference('maxItemsinSearchResults') )
1320           ? C4::Context->preference('maxItemsinSearchResults') - 1
1321           : 1;
1322
1323         # loop through every item
1324         foreach my $field (@fields) {
1325             my $item;
1326             $items_counter++;
1327
1328             # populate the items hash
1329             foreach my $code ( keys %subfieldstosearch ) {
1330                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1331             }
1332
1333       # set item's branch name, use homebranch first, fall back to holdingbranch
1334             if ( $item->{'homebranch'} ) {
1335                 $item->{'branchname'} = $branches{ $item->{homebranch} };
1336             }
1337
1338             # Last resort
1339             elsif ( $item->{'holdingbranch'} ) {
1340                 $item->{'branchname'} = $branches{ $item->{holdingbranch} };
1341             }
1342
1343 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1344             if ( $item->{onloan} ) {
1345                 $onloan_count++;
1346                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{due_date} = format_date( $item->{onloan} );
1347                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{count}++ if $item->{'homebranch'};
1348                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{branchname} = $item->{'branchname'};
1349                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{location} = $locations{ $item->{location} };
1350                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{itemcallnumber} = $item->{itemcallnumber};
1351                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1352                 # if something's checked out and lost, mark it as 'long overdue'
1353                 if ( $item->{itemlost} ) {
1354                     $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{longoverdue}++;
1355                     $longoverdue_count++;
1356                 }
1357
1358                 # can place holds as long as this item isn't lost
1359                 else {
1360                     $can_place_holds = 1;
1361                 }
1362             }
1363
1364          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1365             else {
1366
1367                 # item is on order
1368                 if ( $item->{notforloan} == -1 ) {
1369                     $ordered_count++;
1370                 }
1371
1372                 # item is withdrawn, lost or damaged
1373                 if (   $item->{wthdrawn}
1374                     || $item->{itemlost}
1375                     || $item->{damaged}
1376                     || $item->{notforloan} )
1377                 {
1378                     $wthdrawn_count++    if $item->{wthdrawn};
1379                     $itemlost_count++    if $item->{itemlost};
1380                     $itemdamaged_count++ if $item->{damaged};
1381                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1382                     $other_count++;
1383
1384                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{wthdrawn} = $item->{wthdrawn};
1385                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemlost} = $item->{itemlost};
1386                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{damaged} = $item->{damaged};
1387                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{notforloan} = GetAuthorisedValueDesc( '', '', $item->{notforloan}, '', '', $notforloan_authorised_value ) if $notforloan_authorised_value;
1388                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{count}++ if $item->{'homebranch'};
1389                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{branchname} = $item->{'branchname'};
1390                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{location} = $locations{ $item->{location} };
1391                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1392                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1393                 }
1394
1395                 # item is available
1396                 else {
1397                     $can_place_holds = 1;
1398                     $available_count++;
1399                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1400                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1401                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{location} = $locations{ $item->{location} };
1402                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1403                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1404                 }
1405             }
1406         }    # notforloan, item level and biblioitem level
1407         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1408         my $maxitems =
1409           ( C4::Context->preference('maxItemsinSearchResults') )
1410           ? C4::Context->preference('maxItemsinSearchResults') - 1
1411           : 1;
1412         for my $key ( sort keys %$onloan_items ) {
1413             $onloanitemscount++;
1414             push @onloan_items_loop, $onloan_items->{$key}
1415               unless $onloanitemscount > $maxitems;
1416         }
1417         for my $key ( sort keys %$other_items ) {
1418             $otheritemscount++;
1419             push @other_items_loop, $other_items->{$key}
1420               unless $otheritemscount > $maxitems;
1421         }
1422         for my $key ( sort keys %$available_items ) {
1423             $availableitemscount++;
1424             push @available_items_loop, $available_items->{$key}
1425               unless $availableitemscount > $maxitems;
1426         }
1427
1428 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1429         $can_place_holds = 0
1430           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1431         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1432         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1433         $oldbiblio->{items_count}          = $items_count;
1434         $oldbiblio->{available_items_loop} = \@available_items_loop;
1435         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1436         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1437         $oldbiblio->{availablecount}       = $available_count;
1438         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1439         $oldbiblio->{onloancount}          = $onloan_count;
1440         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1441         $oldbiblio->{othercount}           = $other_count;
1442         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1443         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1444         $oldbiblio->{itemlostcount}        = $itemlost_count;
1445         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1446         $oldbiblio->{orderedcount}         = $ordered_count;
1447         $oldbiblio->{isbn} =~
1448           s/-//g;    # deleting - in isbn to enable amazon content
1449         push( @newresults, $oldbiblio );
1450     }
1451     return @newresults;
1452 }
1453
1454 #----------------------------------------------------------------------
1455 #
1456 # Non-Zebra GetRecords#
1457 #----------------------------------------------------------------------
1458
1459 =head2 NZgetRecords
1460
1461   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1462
1463 =cut
1464
1465 sub NZgetRecords {
1466     my (
1467         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1468         $results_per_page, $offset,       $expanded_facet, $branches,
1469         $query_type,       $scan
1470     ) = @_;
1471     warn "query =$query" if $DEBUG;
1472     my $result = NZanalyse($query);
1473     warn "results =$result" if $DEBUG;
1474     return ( undef,
1475         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1476         undef );
1477 }
1478
1479 =head2 NZanalyse
1480
1481   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1482   the list is built from an inverted index in the nozebra SQL table
1483   note that title is here only for convenience : the sorting will be very fast when requested on title
1484   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1485
1486 =cut
1487
1488 sub NZanalyse {
1489     my ( $string, $server ) = @_;
1490     warn "---------"       if $DEBUG;
1491     warn "Enter NZanalyse" if $DEBUG;
1492     warn "---------"       if $DEBUG;
1493
1494  # $server contains biblioserver or authorities, depending on what we search on.
1495  #warn "querying : $string on $server";
1496     $server = 'biblioserver' unless $server;
1497
1498 # if we have a ", replace the content to discard temporarily any and/or/not inside
1499     my $commacontent;
1500     if ( $string =~ /"/ ) {
1501         $string =~ s/"(.*?)"/__X__/;
1502         $commacontent = $1;
1503         warn "commacontent : $commacontent" if $DEBUG;
1504     }
1505
1506 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1507 # then, call again NZanalyse with $left and $right
1508 # (recursive until we find a leaf (=> something without and/or/not)
1509 # delete repeated operator... Would then go in infinite loop
1510     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1511     }
1512
1513     #process parenthesis before.
1514     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1515         my $left     = $1;
1516         my $right    = $4;
1517         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1518         warn
1519 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1520           if $DEBUG;
1521         my $leftresult = NZanalyse( $left, $server );
1522         if ($operator) {
1523             my $rightresult = NZanalyse( $right, $server );
1524
1525             # OK, we have the results for right and left part of the query
1526             # depending of operand, intersect, union or exclude both lists
1527             # to get a result list
1528             if ( $operator eq ' and ' ) {
1529                 my @leftresult = split /;/, $leftresult;
1530                 warn " @leftresult / $rightresult \n" if $DEBUG;
1531
1532                 #             my @rightresult = split /;/,$leftresult;
1533                 my $finalresult;
1534
1535 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1536 # the result is stored twice, to have the same weight for AND than OR.
1537 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1538 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1539                 foreach (@leftresult) {
1540                     my $value = $_;
1541                     my $countvalue;
1542                     ( $value, $countvalue ) = ( $1, $2 )
1543                       if $value =~ m/(.*)-(\d+)$/;
1544                     if ( $rightresult =~ /$value-(\d+);/ ) {
1545                         $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1546                         $finalresult .=
1547                           "$value-$countvalue;$value-$countvalue;";
1548                     }
1549                 }
1550                 warn " $finalresult \n" if $DEBUG;
1551                 return $finalresult;
1552             }
1553             elsif ( $operator eq ' or ' ) {
1554
1555                 # just merge the 2 strings
1556                 return $leftresult . $rightresult;
1557             }
1558             elsif ( $operator eq ' not ' ) {
1559                 my @leftresult = split /;/, $leftresult;
1560
1561                 #             my @rightresult = split /;/,$leftresult;
1562                 my $finalresult;
1563                 foreach (@leftresult) {
1564                     my $value = $_;
1565                     $value = $1 if $value =~ m/(.*)-\d+$/;
1566                     unless ( $rightresult =~ "$value-" ) {
1567                     }
1568                 }
1569                 return $finalresult;
1570             }
1571             else {
1572
1573 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1574                 return $leftresult;
1575                 exit;
1576             }
1577         }
1578     }
1579     warn "string :" . $string if $DEBUG;
1580     $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1581     my $left     = $1;
1582     my $right    = $3;
1583     my $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1584     warn "dealing w/parenthesis. left :$left operator:$operator right:$right"
1585       if $DEBUG;
1586
1587     # it's not a leaf, we have a and/or/not
1588     if ($operator) {
1589
1590         # reintroduce comma content if needed
1591         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1592         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1593         warn "node : $left / $operator / $right\n" if $DEBUG;
1594         my $leftresult  = NZanalyse( $left,  $server );
1595         my $rightresult = NZanalyse( $right, $server );
1596
1597         # OK, we have the results for right and left part of the query
1598         # depending of operand, intersect, union or exclude both lists
1599         # to get a result list
1600         if ( $operator eq ' and ' ) {
1601             my @leftresult = split /;/, $leftresult;
1602
1603             #             my @rightresult = split /;/,$leftresult;
1604             my $finalresult;
1605
1606 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1607 # the result is stored twice, to have the same weight for AND than OR.
1608 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1609 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1610             foreach (@leftresult) {
1611                 if ( $rightresult =~ "$_;" ) {
1612                     $finalresult .= "$_;$_;";
1613                 }
1614             }
1615             return $finalresult;
1616         }
1617         elsif ( $operator eq ' or ' ) {
1618
1619             # just merge the 2 strings
1620             return $leftresult . $rightresult;
1621         }
1622         elsif ( $operator eq ' not ' ) {
1623             my @leftresult = split /;/, $leftresult;
1624
1625             #             my @rightresult = split /;/,$leftresult;
1626             my $finalresult;
1627             foreach (@leftresult) {
1628                 unless ( $rightresult =~ "$_;" ) {
1629                     $finalresult .= "$_;";
1630                 }
1631             }
1632             return $finalresult;
1633         }
1634         else {
1635
1636 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1637             die "error : operand unknown : $operator for $string";
1638         }
1639
1640         # it's a leaf, do the real SQL query and return the result
1641     }
1642     else {
1643         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1644         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1645         warn "leaf:$string" if $DEBUG;
1646
1647         # parse the string in in operator/operand/value again
1648         $string =~ /(.*)(>=|<=)(.*)/;
1649         my $left     = $1;
1650         my $operator = $2;
1651         my $right    = $3;
1652         warn "handling leaf... left:$left operator:$operator right:$right"
1653           if $DEBUG;
1654         unless ($operator) {
1655             $string =~ /(.*)(>|<|=)(.*)/;
1656             $left     = $1;
1657             $operator = $2;
1658             $right    = $3;
1659             warn
1660 "handling unless (operator)... left:$left operator:$operator right:$right"
1661               if $DEBUG;
1662         }
1663         my $results;
1664
1665 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1666         $left =~ s/[ ,].*$//;
1667
1668         # automatic replace for short operators
1669         $left = 'title'            if $left =~ '^ti$';
1670         $left = 'author'           if $left =~ '^au$';
1671         $left = 'publisher'        if $left =~ '^pb$';
1672         $left = 'subject'          if $left =~ '^su$';
1673         $left = 'koha-Auth-Number' if $left =~ '^an$';
1674         $left = 'keyword'          if $left =~ '^kw$';
1675         if ( $operator && $left ne 'keyword' ) {
1676
1677             #do a specific search
1678             my $dbh = C4::Context->dbh;
1679             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1680             my $sth =
1681               $dbh->prepare(
1682 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1683               );
1684             warn "$left / $operator / $right\n";
1685
1686             # split each word, query the DB and build the biblionumbers result
1687             #sanitizing leftpart
1688             $left =~ s/^\s+|\s+$//;
1689             foreach ( split / /, $right ) {
1690                 my $biblionumbers;
1691                 $_ =~ s/^\s+|\s+$//;
1692                 next unless $_;
1693                 warn "EXECUTE : $server, $left, $_";
1694                 $sth->execute( $server, $left, $_ )
1695                   or warn "execute failed: $!";
1696                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1697
1698 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1699 # otherwise, fill the result
1700                     $biblionumbers .= $line
1701                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1702                     warn "result : $value "
1703                       . ( $right  =~ /\d/ ) . "=="
1704                       . ( !$value =~ /\d/ );         #= $line";
1705                 }
1706
1707 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1708                 if ($results) {
1709                     my @leftresult = split /;/, $biblionumbers;
1710                     my $temp;
1711                     foreach my $entry (@leftresult)
1712                     {    # $_ contains biblionumber,title-weight
1713                             # remove weight at the end
1714                         my $cleaned = $entry;
1715                         $cleaned =~ s/-\d*$//;
1716
1717                    # if the entry already in the hash, take it & increase weight
1718                         warn "===== $cleaned =====" if $DEBUG;
1719                         if ( $results =~ "$cleaned" ) {
1720                             $temp .= "$entry;$entry;";
1721                             warn "INCLUDING $entry" if $DEBUG;
1722                         }
1723                     }
1724                     $results = $temp;
1725                 }
1726                 else {
1727                     $results = $biblionumbers;
1728                 }
1729             }
1730         }
1731         else {
1732
1733       #do a complete search (all indexes), if index='kw' do complete search too.
1734             my $dbh = C4::Context->dbh;
1735             my $sth =
1736               $dbh->prepare(
1737 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1738               );
1739
1740             # split each word, query the DB and build the biblionumbers result
1741             foreach ( split / /, $string ) {
1742                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1743                 warn "search on all indexes on $_" if $DEBUG;
1744                 my $biblionumbers;
1745                 next unless $_;
1746                 $sth->execute( $server, $_ );
1747                 while ( my $line = $sth->fetchrow ) {
1748                     $biblionumbers .= $line;
1749                 }
1750
1751 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1752                 if ($results) {
1753                     warn "RES for $_ = $biblionumbers" if $DEBUG;
1754                     my @leftresult = split /;/, $biblionumbers;
1755                     my $temp;
1756                     foreach my $entry (@leftresult)
1757                     {    # $_ contains biblionumber,title-weight
1758                             # remove weight at the end
1759                         my $cleaned = $entry;
1760                         $cleaned =~ s/-\d*$//;
1761
1762                # if the entry already in the hash, take it & increase weight
1763                #                          warn "===== $cleaned =====" if $DEBUG;
1764                         if ( $results =~ "$cleaned" ) {
1765                             $temp .= "$entry;$entry;";
1766
1767                #                              warn "INCLUDING $entry" if $DEBUG;
1768                         }
1769                     }
1770                     $results = $temp;
1771                 }
1772                 else {
1773                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1774                     $results = $biblionumbers;
1775                 }
1776             }
1777         }
1778         warn "return : $results for LEAF : $string" if $DEBUG;
1779         return $results;
1780     }
1781     warn "---------"       if $DEBUG;
1782     warn "Leave NZanalyse" if $DEBUG;
1783     warn "---------"       if $DEBUG;
1784 }
1785
1786 =head2 NZorder
1787
1788   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1789
1790   TODO :: Description
1791
1792 =cut
1793
1794 sub NZorder {
1795     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1796     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1797
1798     # order title asc by default
1799     #     $ordering = '1=36 <i' unless $ordering;
1800     $results_per_page = 20 unless $results_per_page;
1801     $offset           = 0  unless $offset;
1802     my $dbh = C4::Context->dbh;
1803
1804     #
1805     # order by POPULARITY
1806     #
1807     if ( $ordering =~ /popularity/ ) {
1808         my %result;
1809         my %popularity;
1810
1811         # popularity is not in MARC record, it's builded from a specific query
1812         my $sth =
1813           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1814         foreach ( split /;/, $biblionumbers ) {
1815             my ( $biblionumber, $title ) = split /,/, $_;
1816             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1817             $sth->execute($biblionumber);
1818             my $popularity = $sth->fetchrow || 0;
1819
1820 # hint : the key is popularity.title because we can have
1821 # many results with the same popularity. In this cas, sub-ordering is done by title
1822 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1823 # (un-frequent, I agree, but we won't forget anything that way ;-)
1824             $popularity{ sprintf( "%10d", $popularity ) . $title
1825                   . $biblionumber } = $biblionumber;
1826         }
1827
1828     # sort the hash and return the same structure as GetRecords (Zebra querying)
1829         my $result_hash;
1830         my $numbers = 0;
1831         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1832             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1833                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1834                   $result{ $popularity{$key} }->as_usmarc();
1835             }
1836         }
1837         else {                                    # sort popularity ASC
1838             foreach my $key ( sort ( keys %popularity ) ) {
1839                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1840                   $result{ $popularity{$key} }->as_usmarc();
1841             }
1842         }
1843         my $finalresult = ();
1844         $result_hash->{'hits'}         = $numbers;
1845         $finalresult->{'biblioserver'} = $result_hash;
1846         return $finalresult;
1847
1848         #
1849         # ORDER BY author
1850         #
1851     }
1852     elsif ( $ordering =~ /author/ ) {
1853         my %result;
1854         foreach ( split /;/, $biblionumbers ) {
1855             my ( $biblionumber, $title ) = split /,/, $_;
1856             my $record = GetMarcBiblio($biblionumber);
1857             my $author;
1858             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1859                 $author = $record->subfield( '200', 'f' );
1860                 $author = $record->subfield( '700', 'a' ) unless $author;
1861             }
1862             else {
1863                 $author = $record->subfield( '100', 'a' );
1864             }
1865
1866 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1867 # and we don't want to get only 1 result for each of them !!!
1868             $result{ $author . $biblionumber } = $record;
1869         }
1870
1871     # sort the hash and return the same structure as GetRecords (Zebra querying)
1872         my $result_hash;
1873         my $numbers = 0;
1874         if ( $ordering eq 'author_za' ) {    # sort by author desc
1875             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1876                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1877                   $result{$key}->as_usmarc();
1878             }
1879         }
1880         else {                               # sort by author ASC
1881             foreach my $key ( sort ( keys %result ) ) {
1882                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1883                   $result{$key}->as_usmarc();
1884             }
1885         }
1886         my $finalresult = ();
1887         $result_hash->{'hits'}         = $numbers;
1888         $finalresult->{'biblioserver'} = $result_hash;
1889         return $finalresult;
1890
1891         #
1892         # ORDER BY callnumber
1893         #
1894     }
1895     elsif ( $ordering =~ /callnumber/ ) {
1896         my %result;
1897         foreach ( split /;/, $biblionumbers ) {
1898             my ( $biblionumber, $title ) = split /,/, $_;
1899             my $record = GetMarcBiblio($biblionumber);
1900             my $callnumber;
1901             my ( $callnumber_tag, $callnumber_subfield ) =
1902               GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1903             ( $callnumber_tag, $callnumber_subfield ) =
1904               GetMarcFromKohaField('biblioitems.callnumber')
1905               unless $callnumber_tag;
1906             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1907                 $callnumber = $record->subfield( '200', 'f' );
1908             }
1909             else {
1910                 $callnumber = $record->subfield( '100', 'a' );
1911             }
1912
1913 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1914 # and we don't want to get only 1 result for each of them !!!
1915             $result{ $callnumber . $biblionumber } = $record;
1916         }
1917
1918     # sort the hash and return the same structure as GetRecords (Zebra querying)
1919         my $result_hash;
1920         my $numbers = 0;
1921         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1922             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1923                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1924                   $result{$key}->as_usmarc();
1925             }
1926         }
1927         else {                                     # sort by title ASC
1928             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1929                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1930                   $result{$key}->as_usmarc();
1931             }
1932         }
1933         my $finalresult = ();
1934         $result_hash->{'hits'}         = $numbers;
1935         $finalresult->{'biblioserver'} = $result_hash;
1936         return $finalresult;
1937     }
1938     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1939         my %result;
1940         foreach ( split /;/, $biblionumbers ) {
1941             my ( $biblionumber, $title ) = split /,/, $_;
1942             my $record = GetMarcBiblio($biblionumber);
1943             my ( $publicationyear_tag, $publicationyear_subfield ) =
1944               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1945             my $publicationyear =
1946               $record->subfield( $publicationyear_tag,
1947                 $publicationyear_subfield );
1948
1949 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1950 # and we don't want to get only 1 result for each of them !!!
1951             $result{ $publicationyear . $biblionumber } = $record;
1952         }
1953
1954     # sort the hash and return the same structure as GetRecords (Zebra querying)
1955         my $result_hash;
1956         my $numbers = 0;
1957         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
1958             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1959                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1960                   $result{$key}->as_usmarc();
1961             }
1962         }
1963         else {                                 # sort by pub year ASC
1964             foreach my $key ( sort ( keys %result ) ) {
1965                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1966                   $result{$key}->as_usmarc();
1967             }
1968         }
1969         my $finalresult = ();
1970         $result_hash->{'hits'}         = $numbers;
1971         $finalresult->{'biblioserver'} = $result_hash;
1972         return $finalresult;
1973
1974         #
1975         # ORDER BY title
1976         #
1977     }
1978     elsif ( $ordering =~ /title/ ) {
1979
1980 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1981         my %result;
1982         foreach ( split /;/, $biblionumbers ) {
1983             my ( $biblionumber, $title ) = split /,/, $_;
1984
1985 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1986 # and we don't want to get only 1 result for each of them !!!
1987 # hint & speed improvement : we can order without reading the record
1988 # so order, and read records only for the requested page !
1989             $result{ $title . $biblionumber } = $biblionumber;
1990         }
1991
1992     # sort the hash and return the same structure as GetRecords (Zebra querying)
1993         my $result_hash;
1994         my $numbers = 0;
1995         if ( $ordering eq 'title_az' ) {    # sort by title desc
1996             foreach my $key ( sort ( keys %result ) ) {
1997                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1998             }
1999         }
2000         else {                              # sort by title ASC
2001             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2002                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2003             }
2004         }
2005
2006         # limit the $results_per_page to result size if it's more
2007         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2008
2009         # for the requested page, replace biblionumber by the complete record
2010         # speed improvement : avoid reading too much things
2011         for (
2012             my $counter = $offset ;
2013             $counter <= $offset + $results_per_page ;
2014             $counter++
2015           )
2016         {
2017             $result_hash->{'RECORDS'}[$counter] =
2018               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2019         }
2020         my $finalresult = ();
2021         $result_hash->{'hits'}         = $numbers;
2022         $finalresult->{'biblioserver'} = $result_hash;
2023         return $finalresult;
2024     }
2025     else {
2026
2027 #
2028 # order by ranking
2029 #
2030 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2031         my %result;
2032         my %count_ranking;
2033         foreach ( split /;/, $biblionumbers ) {
2034             my ( $biblionumber, $title ) = split /,/, $_;
2035             $title =~ /(.*)-(\d)/;
2036
2037             # get weight
2038             my $ranking = $2;
2039
2040 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2041 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2042 # biblio N has ranking = 6
2043             $count_ranking{$biblionumber} += $ranking;
2044         }
2045
2046 # build the result by "inverting" the count_ranking hash
2047 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2048 #         warn "counting";
2049         foreach ( keys %count_ranking ) {
2050             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2051         }
2052
2053     # sort the hash and return the same structure as GetRecords (Zebra querying)
2054         my $result_hash;
2055         my $numbers = 0;
2056         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2057             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2058         }
2059
2060         # limit the $results_per_page to result size if it's more
2061         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2062
2063         # for the requested page, replace biblionumber by the complete record
2064         # speed improvement : avoid reading too much things
2065         for (
2066             my $counter = $offset ;
2067             $counter <= $offset + $results_per_page ;
2068             $counter++
2069           )
2070         {
2071             $result_hash->{'RECORDS'}[$counter] =
2072               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2073               if $result_hash->{'RECORDS'}[$counter];
2074         }
2075         my $finalresult = ();
2076         $result_hash->{'hits'}         = $numbers;
2077         $finalresult->{'biblioserver'} = $result_hash;
2078         return $finalresult;
2079     }
2080 }
2081
2082 =head2 ModBiblios
2083
2084 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2085
2086 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2087 test parameter if set donot perform change to records in database.
2088
2089 =over 2
2090
2091 =item C<input arg:>
2092
2093     * $listbiblios is an array ref to marcrecords to be changed
2094     * $tagsubfield is the reference of the subfield to change.
2095     * $initvalue is the value to search the record for
2096     * $targetvalue is the value to set the subfield to
2097     * $test is to be set only not to perform changes in database.
2098
2099 =item C<Output arg:>
2100     * $countchanged counts all the changes performed.
2101     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2102
2103 =item C<usage in the script:>
2104
2105 =back
2106
2107 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2108 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2109 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2110
2111 =cut
2112
2113 sub ModBiblios {
2114     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2115     my $countmatched;
2116     my @unmatched;
2117     my ( $tag, $subfield ) = ( $1, $2 )
2118       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2119     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2120         $tag = $tag . $subfield;
2121         undef $subfield;
2122     }
2123     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber');
2124     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2125     foreach my $usmarc (@$listbiblios) {
2126         my $record;
2127         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2128         my $biblionumber;
2129         if ($@) {
2130
2131             # usmarc is not a valid usmarc May be a biblionumber
2132             if ( $tag eq $itemtag ) {
2133                 my $bib = GetBiblioFromItemNumber($usmarc);
2134                 $record = GetMarcItem( $bib->{'biblionumber'}, $usmarc );
2135                 $biblionumber = $bib->{'biblionumber'};
2136             }
2137             else {
2138                 $record       = GetMarcBiblio($usmarc);
2139                 $biblionumber = $usmarc;
2140             }
2141         }
2142         else {
2143             if ( $bntag >= 010 ) {
2144                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2145             }
2146             else {
2147                 $biblionumber = $record->field($bntag)->data;
2148             }
2149         }
2150
2151         #GetBiblionumber is to be written.
2152         #Could be replaced by TransformMarcToKoha (But Would be longer)
2153         if ( $record->field($tag) ) {
2154             my $modify = 0;
2155             foreach my $field ( $record->field($tag) ) {
2156                 if ($subfield) {
2157                     if (
2158                         $field->delete_subfield(
2159                             'code'  => $subfield,
2160                             'match' => qr($initvalue)
2161                         )
2162                       )
2163                     {
2164                         $countmatched++;
2165                         $modify = 1;
2166                         $field->update( $subfield, $targetvalue )
2167                           if ($targetvalue);
2168                     }
2169                 }
2170                 else {
2171                     if ( $tag >= 010 ) {
2172                         if ( $field->delete_field($field) ) {
2173                             $countmatched++;
2174                             $modify = 1;
2175                         }
2176                     }
2177                     else {
2178                         $field->data = $targetvalue
2179                           if ( $field->data =~ qr($initvalue) );
2180                     }
2181                 }
2182             }
2183
2184             #       warn $record->as_formatted;
2185             if ($modify) {
2186                 ModBiblio( $record, $biblionumber,
2187                     GetFrameworkCode($biblionumber) )
2188                   unless ($test);
2189             }
2190             else {
2191                 push @unmatched, $biblionumber;
2192             }
2193         }
2194         else {
2195             push @unmatched, $biblionumber;
2196         }
2197     }
2198     return ( $countmatched, \@unmatched );
2199 }
2200
2201 END { }    # module clean-up code here (global destructor)
2202
2203 1;
2204 __END__
2205
2206 =head1 AUTHOR
2207
2208 Koha Developement team <info@koha.org>
2209
2210 =cut