C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30
  31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  32
  33 # set the version for version checking
  34 BEGIN {
  35     $VERSION = 3.01;
  36     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  37 }
  38
  39 =head1 NAME
  40
  41 C4::Search - Functions for searching the Koha catalog.
  42
  43 =head1 SYNOPSIS
  44
  45 See opac/opac-search.pl or catalogue/search.pl for example of usage
  46
  47 =head1 DESCRIPTION
  48
  49 This module provides searching functions for Koha's bibliographic databases
  50
  51 =head1 FUNCTIONS
  52
  53 =cut
  54
  55 @ISA    = qw(Exporter);
  56 @EXPORT = qw(
  57   &findseealso
  58   &FindDuplicate
  59   &SimpleSearch
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 findseealso($dbh,$fields);
  69
  70 C<$dbh> is a link to the DB handler.
  71
  72 use C4::Context;
  73 my $dbh =C4::Context->dbh;
  74
  75 C<$fields> is a reference to the fields array
  76
  77 This function modifies the @$fields array and adds related fields to search on.
  78
  79 FIXME: this function is probably deprecated in Koha 3
  80
  81 =cut
  82
  83 sub findseealso {
  84     my ( $dbh, $fields ) = @_;
  85     my $tagslib = GetMarcStructure(1);
  86     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  87         my ($tag)      = substr( @$fields[$i], 1, 3 );
  88         my ($subfield) = substr( @$fields[$i], 4, 1 );
  89         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  90           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  91     }
  92 }
  93
  94 =head2 FindDuplicate
  95
  96 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  97
  98 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  99
 100 =cut
 101
 102 sub FindDuplicate {
 103     my ($record) = @_;
 104     my $dbh = C4::Context->dbh;
 105     my $result = TransformMarcToKoha( $dbh, $record, '' );
 106     my $sth;
 107     my $query;
 108     my $search;
 109     my $type;
 110     my ( $biblionumber, $title );
 111
 112     # search duplicate on ISBN, easy and fast..
 113     # ... normalize first
 114     if ( $result->{isbn} ) {
 115         $result->{isbn} =~ s/\(.*$//;
 116         $result->{isbn} =~ s/\s+$//;
 117         $query = "isbn=$result->{isbn}";
 118     }
 119     else {
 120         $result->{title} =~ s /\\//g;
 121         $result->{title} =~ s /\"//g;
 122         $result->{title} =~ s /\(//g;
 123         $result->{title} =~ s /\)//g;
 124
 125         # FIXME: instead of removing operators, could just do
 126         # quotes around the value
 127         $result->{title} =~ s/(and|or|not)//g;
 128         $query = "ti,ext=$result->{title}";
 129         $query .= " and itemtype=$result->{itemtype}"
 130           if ( $result->{itemtype} );
 131         if   ( $result->{author} ) {
 132             $result->{author} =~ s /\\//g;
 133             $result->{author} =~ s /\"//g;
 134             $result->{author} =~ s /\(//g;
 135             $result->{author} =~ s /\)//g;
 136
 137             # remove valid operators
 138             $result->{author} =~ s/(and|or|not)//g;
 139             $query .= " and au,ext=$result->{author}";
 140         }
 141     }
 142
 143     # FIXME: add error handling
 144     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 145     my @results;
 146     foreach my $possible_duplicate_record (@$searchresults) {
 147         my $marcrecord =
 148           MARC::Record->new_from_usmarc($possible_duplicate_record);
 149         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 150
 151         # FIXME :: why 2 $biblionumber ?
 152         if ($result) {
 153             push @results, $result->{'biblionumber'};
 154             push @results, $result->{'title'};
 155         }
 156     }
 157     return @results;
 158 }
 159
 160 =head2 SimpleSearch
 161
 162 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 163
 164 This function provides a simple search API on the bibliographic catalog
 165
 166 =over 2
 167
 168 =item C<input arg:>
 169
 170     * $query can be a simple keyword or a complete CCL query
 171     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 172     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 173     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 174
 175
 176 =item C<Output:>
 177
 178     * $error is a empty unless an error is detected
 179     * \@results is an array of records.
 180     * $total_hits is the number of hits that would have been returned with no limit
 181
 182 =item C<usage in the script:>
 183
 184 =back
 185
 186 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 187
 188 if (defined $error) {
 189     $template->param(query_error => $error);
 190     warn "error: ".$error;
 191     output_html_with_http_headers $input, $cookie, $template->output;
 192     exit;
 193 }
 194
 195 my $hits = scalar @$marcresults;
 196 my @results;
 197
 198 for my $i (0..$hits) {
 199     my %resultsloop;
 200     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 201     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 202
 203     #build the hash for the template.
 204     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 205     $resultsloop{title}           = $biblio->{'title'};
 206     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 207     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 208     $resultsloop{author}          = $biblio->{'author'};
 209     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 210     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 211
 212     push @results, \%resultsloop;
 213 }
 214
 215 $template->param(result=>\@results);
 216
 217 =cut
 218
 219 sub SimpleSearch {
 220     my ( $query, $offset, $max_results, $servers )  = @_;
 221
 222     if ( C4::Context->preference('NoZebra') ) {
 223         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 224         my $search_result =
 225           (      $result->{hits}
 226               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 227         return ( undef, $search_result, scalar($search_result) );
 228     }
 229     else {
 230         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 231         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 232         my @results;
 233         my @zoom_queries;
 234         my @tmpresults;
 235         my @zconns;
 236         my $total_hits;
 237         return ( "No query entered", undef, undef ) unless $query;
 238
 239         # Initialize & Search Zebra
 240         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 241             eval {
 242                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 243                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 244                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 245
 246                 # error handling
 247                 my $error =
 248                     $zconns[$i]->errmsg() . " ("
 249                   . $zconns[$i]->errcode() . ") "
 250                   . $zconns[$i]->addinfo() . " "
 251                   . $zconns[$i]->diagset();
 252
 253                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 254             };
 255             if ($@) {
 256
 257                 # caught a ZOOM::Exception
 258                 my $error =
 259                     $@->message() . " ("
 260                   . $@->code() . ") "
 261                   . $@->addinfo() . " "
 262                   . $@->diagset();
 263                 warn $error;
 264                 return ( $error, undef, undef );
 265             }
 266         }
 267         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 268             my $event = $zconns[ $i - 1 ]->last_event();
 269             if ( $event == ZOOM::Event::ZEND ) {
 270
 271                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 272                 my $hits = $tmpresults[ $i - 1 ]->size();
 273                 $total_hits += $hits;
 274                 my $last_record = $hits;
 275                 if ( defined $max_results && $offset + $max_results < $hits ) {
 276                     $last_record  = $offset + $max_results;
 277                 }
 278
 279                 for my $j ( $first_record..$last_record ) {
 280                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 281                     push @results, $record;
 282                 }
 283             }
 284         }
 285
 286         foreach my $result (@tmpresults) {
 287             $result->destroy();
 288         }
 289         foreach my $zoom_query (@zoom_queries) {
 290             $zoom_query->destroy();
 291         }
 292
 293         return ( undef, \@results, $total_hits );
 294     }
 295 }
 296
 297 =head2 getRecords
 298
 299 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 300
 301         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 302         $results_per_page, $offset,       $expanded_facet, $branches,
 303         $query_type,       $scan
 304     );
 305
 306 The all singing, all dancing, multi-server, asynchronous, scanning,
 307 searching, record nabbing, facet-building
 308
 309 See verbse embedded documentation.
 310
 311 =cut
 312
 313 sub getRecords {
 314     my (
 315         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 316         $results_per_page, $offset,       $expanded_facet, $branches,
 317         $query_type,       $scan
 318     ) = @_;
 319
 320     my @servers = @$servers_ref;
 321     my @sort_by = @$sort_by_ref;
 322
 323     # Initialize variables for the ZOOM connection and results object
 324     my $zconn;
 325     my @zconns;
 326     my @results;
 327     my $results_hashref = ();
 328
 329     # Initialize variables for the faceted results objects
 330     my $facets_counter = ();
 331     my $facets_info    = ();
 332     my $facets         = getFacets();
 333
 334     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 335
 336     ### LOOP THROUGH THE SERVERS
 337     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 338         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 339
 340 # perform the search, create the results objects
 341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 342         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 343
 344         #$query_to_use = $simple_query if $scan;
 345         warn $simple_query if ( $scan and $DEBUG );
 346
 347         # Check if we've got a query_type defined, if so, use it
 348         eval {
 349             if ($query_type)
 350             {
 351                 if ( $query_type =~ /^ccl/ ) {
 352                     $query_to_use =~
 353                       s/\:/\=/g;    # change : to = last minute (FIXME)
 354                     $results[$i] =
 355                       $zconns[$i]->search(
 356                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 357                       );
 358                 }
 359                 elsif ( $query_type =~ /^cql/ ) {
 360                     $results[$i] =
 361                       $zconns[$i]->search(
 362                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 363                 }
 364                 elsif ( $query_type =~ /^pqf/ ) {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 368                 }
 369             }
 370             else {
 371                 if ($scan) {
 372                     $results[$i] =
 373                       $zconns[$i]->scan(
 374                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 375                       );
 376                 }
 377                 else {
 378                     $results[$i] =
 379                       $zconns[$i]->search(
 380                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 381                       );
 382                 }
 383             }
 384         };
 385         if ($@) {
 386             warn "WARNING: query problem with $query_to_use " . $@;
 387         }
 388
 389         # Concatenate the sort_by limits and pass them to the results object
 390         # Note: sort will override rank
 391         my $sort_by;
 392         foreach my $sort (@sort_by) {
 393             if ( $sort eq "author_az" ) {
 394                 $sort_by .= "1=1003 <i ";
 395             }
 396             elsif ( $sort eq "author_za" ) {
 397                 $sort_by .= "1=1003 >i ";
 398             }
 399             elsif ( $sort eq "popularity_asc" ) {
 400                 $sort_by .= "1=9003 <i ";
 401             }
 402             elsif ( $sort eq "popularity_dsc" ) {
 403                 $sort_by .= "1=9003 >i ";
 404             }
 405             elsif ( $sort eq "call_number_asc" ) {
 406                 $sort_by .= "1=20  <i ";
 407             }
 408             elsif ( $sort eq "call_number_dsc" ) {
 409                 $sort_by .= "1=20 >i ";
 410             }
 411             elsif ( $sort eq "pubdate_asc" ) {
 412                 $sort_by .= "1=31 <i ";
 413             }
 414             elsif ( $sort eq "pubdate_dsc" ) {
 415                 $sort_by .= "1=31 >i ";
 416             }
 417             elsif ( $sort eq "acqdate_asc" ) {
 418                 $sort_by .= "1=32 <i ";
 419             }
 420             elsif ( $sort eq "acqdate_dsc" ) {
 421                 $sort_by .= "1=32 >i ";
 422             }
 423             elsif ( $sort eq "title_az" ) {
 424                 $sort_by .= "1=4 <i ";
 425             }
 426             elsif ( $sort eq "title_za" ) {
 427                 $sort_by .= "1=4 >i ";
 428             }
 429             else {
 430                 warn "Ignoring unrecognized sort '$sort' requested";
 431             }
 432         }
 433         if ($sort_by) {
 434             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 435                 warn "WARNING sort $sort_by failed";
 436             }
 437         }
 438     }    # finished looping through servers
 439
 440     # The big moment: asynchronously retrieve results from all servers
 441     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 442         my $ev = $zconns[ $i - 1 ]->last_event();
 443         if ( $ev == ZOOM::Event::ZEND ) {
 444             next unless $results[ $i - 1 ];
 445             my $size = $results[ $i - 1 ]->size();
 446             if ( $size > 0 ) {
 447                 my $results_hash;
 448
 449                 # loop through the results
 450                 $results_hash->{'hits'} = $size;
 451                 my $times;
 452                 if ( $offset + $results_per_page <= $size ) {
 453                     $times = $offset + $results_per_page;
 454                 }
 455                 else {
 456                     $times = $size;
 457                 }
 458                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 459                     my $records_hash;
 460                     my $record;
 461                     my $facet_record;
 462
 463                     ## Check if it's an index scan
 464                     if ($scan) {
 465                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 466
 467                  # here we create a minimal MARC record and hand it off to the
 468                  # template just like a normal result ... perhaps not ideal, but
 469                  # it works for now
 470                         my $tmprecord = MARC::Record->new();
 471                         $tmprecord->encoding('UTF-8');
 472                         my $tmptitle;
 473                         my $tmpauthor;
 474
 475                 # the minimal record in author/title (depending on MARC flavour)
 476                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 477                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 478                             $tmprecord->append_fields($tmptitle);
 479                         } else {
 480                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 481                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 482                             $tmprecord->append_fields($tmptitle);
 483                             $tmprecord->append_fields($tmpauthor);
 484                         }
 485                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 486                     }
 487
 488                     # not an index scan
 489                     else {
 490                         $record = $results[ $i - 1 ]->record($j)->raw();
 491
 492                         # warn "RECORD $j:".$record;
 493                         $results_hash->{'RECORDS'}[$j] = $record;
 494
 495             # Fill the facets while we're looping, but only for the biblioserver
 496                         $facet_record = MARC::Record->new_from_usmarc($record)
 497                           if $servers[ $i - 1 ] =~ /biblioserver/;
 498
 499                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 500                         if ($facet_record) {
 501                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 502
 503                                 if ( $facets->[$k] ) {
 504                                     my @fields;
 505                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 506                                     {
 507                                         push @fields,
 508                                           $facet_record->field($tag);
 509                                     }
 510                                     for my $field (@fields) {
 511                                         my @subfields = $field->subfields();
 512                                         for my $subfield (@subfields) {
 513                                             my ( $code, $data ) = @$subfield;
 514                                             if ( $code eq
 515                                                 $facets->[$k]->{'subfield'} )
 516                                             {
 517                                                 $facets_counter->{ $facets->[$k]
 518                                                       ->{'link_value'} }
 519                                                   ->{$data}++;
 520                                             }
 521                                         }
 522                                     }
 523                                     $facets_info->{ $facets->[$k]
 524                                           ->{'link_value'} }->{'label_value'} =
 525                                       $facets->[$k]->{'label_value'};
 526                                     $facets_info->{ $facets->[$k]
 527                                           ->{'link_value'} }->{'expanded'} =
 528                                       $facets->[$k]->{'expanded'};
 529                                 }
 530                             }
 531                         }
 532                     }
 533                 }
 534                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 535             }
 536
 537             # warn "connection ", $i-1, ": $size hits";
 538             # warn $results[$i-1]->record(0)->render() if $size > 0;
 539
 540             # BUILD FACETS
 541             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 542                 for my $link_value (
 543                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 544                     keys %$facets_counter )
 545                 {
 546                     my $expandable;
 547                     my $number_of_facets;
 548                     my @this_facets_array;
 549                     for my $one_facet (
 550                         sort {
 551                             $facets_counter->{$link_value}
 552                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 553                         } keys %{ $facets_counter->{$link_value} }
 554                       )
 555                     {
 556                         $number_of_facets++;
 557                         if (   ( $number_of_facets < 6 )
 558                             || ( $expanded_facet eq $link_value )
 559                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 560                         {
 561
 562                       # Sanitize the link value ), ( will cause errors with CCL,
 563                             my $facet_link_value = $one_facet;
 564                             $facet_link_value =~ s/(\(|\))/ /g;
 565
 566                             # fix the length that will display in the label,
 567                             my $facet_label_value = $one_facet;
 568                             $facet_label_value =
 569                               substr( $one_facet, 0, 20 ) . "..."
 570                               unless length($facet_label_value) <= 20;
 571
 572                             # if it's a branch, label by the name, not the code,
 573                             if ( $link_value =~ /branch/ ) {
 574                                 $facet_label_value =
 575                                   $branches->{$one_facet}->{'branchname'};
 576                             }
 577
 578                 # but we're down with the whole label being in the link's title.
 579                             my $facet_title_value = $one_facet;
 580
 581                             push @this_facets_array,
 582                               (
 583                                 {
 584                                     facet_count =>
 585                                       $facets_counter->{$link_value}
 586                                       ->{$one_facet},
 587                                     facet_label_value => $facet_label_value,
 588                                     facet_title_value => $facet_title_value,
 589                                     facet_link_value  => $facet_link_value,
 590                                     type_link_value   => $link_value,
 591                                 },
 592                               );
 593                         }
 594                     }
 595
 596                     # handle expanded option
 597                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 598                         $expandable = 1
 599                           if ( ( $number_of_facets > 6 )
 600                             && ( $expanded_facet ne $link_value ) );
 601                     }
 602                     push @facets_loop,
 603                       (
 604                         {
 605                             type_link_value => $link_value,
 606                             type_id         => $link_value . "_id",
 607                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 608                             facets     => \@this_facets_array,
 609                             expandable => $expandable,
 610                             expand     => $link_value,
 611                         }
 612                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 613                 }
 614             }
 615         }
 616     }
 617     return ( undef, $results_hashref, \@facets_loop );
 618 }
 619
 620 sub pazGetRecords {
 621     my (
 622         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 623         $results_per_page, $offset,       $expanded_facet, $branches,
 624         $query_type,       $scan
 625     ) = @_;
 626
 627     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 628     $paz->init();
 629     $paz->search($simple_query);
 630     sleep 1;   # FIXME: WHY?
 631
 632     # do results
 633     my $results_hashref = {};
 634     my $stats = XMLin($paz->stat);
 635     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 636
 637     # for a grouped search result, the number of hits
 638     # is the number of groups returned; 'bib_hits' will have
 639     # the total number of bibs.
 640     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 641     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 642
 643     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 644         my $recid = $hit->{recid}->[0];
 645
 646         my $work_title = $hit->{'md-work-title'}->[0];
 647         my $work_author;
 648         if (exists $hit->{'md-work-author'}) {
 649             $work_author = $hit->{'md-work-author'}->[0];
 650         }
 651         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 652
 653         my $result_group = {};
 654         $result_group->{'group_label'} = $group_label;
 655         $result_group->{'group_merge_key'} = $recid;
 656
 657         my $count = 1;
 658         if (exists $hit->{count}) {
 659             $count = $hit->{count}->[0];
 660         }
 661         $result_group->{'group_count'} = $count;
 662
 663         for (my $i = 0; $i < $count; $i++) {
 664             # FIXME -- may need to worry about diacritics here
 665             my $rec = $paz->record($recid, $i);
 666             push @{ $result_group->{'RECORDS'} }, $rec;
 667         }
 668
 669         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 670     }
 671
 672     # pass through facets
 673     my $termlist_xml = $paz->termlist('author,subject');
 674     my $terms = XMLin($termlist_xml, forcearray => 1);
 675     my @facets_loop = ();
 676     #die Dumper($results);
 677 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 678 #        my @facets = ();
 679 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 680 #            push @facets, {
 681 #                facet_label_value => $facet->{'name'}->[0],
 682 #            };
 683 #        }
 684 #        push @facets_loop, ( {
 685 #            type_label => $list,
 686 #            facets => \@facets,
 687 #        } );
 688 #    }
 689
 690     return ( undef, $results_hashref, \@facets_loop );
 691 }
 692
 693 # STOPWORDS
 694 sub _remove_stopwords {
 695     my ( $operand, $index ) = @_;
 696     my @stopwords_removed;
 697
 698     # phrase and exact-qualified indexes shouldn't have stopwords removed
 699     if ( $index !~ m/phr|ext/ ) {
 700
 701 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 702 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 703 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 704 #       is a stopword, we'd get "çon" and wouldn't find anything...
 705         foreach ( keys %{ C4::Context->stopwords } ) {
 706             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 707             if ( $operand =~
 708                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 709             {
 710                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 711                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 712                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 713                                 $operand =~ s/$1//gi;
 714                 push @stopwords_removed, $_;
 715             }
 716         }
 717     }
 718     return ( $operand, \@stopwords_removed );
 719 }
 720
 721 # TRUNCATION
 722 sub _detect_truncation {
 723     my ( $operand, $index ) = @_;
 724     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 725         @regexpr );
 726     $operand =~ s/^ //g;
 727     my @wordlist = split( /\s/, $operand );
 728     foreach my $word (@wordlist) {
 729         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 730             push @rightlefttruncated, $word;
 731         }
 732         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 733             push @lefttruncated, $word;
 734         }
 735         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 736             push @righttruncated, $word;
 737         }
 738         elsif ( index( $word, "*" ) < 0 ) {
 739             push @nontruncated, $word;
 740         }
 741         else {
 742             push @regexpr, $word;
 743         }
 744     }
 745     return (
 746         \@nontruncated,       \@righttruncated, \@lefttruncated,
 747         \@rightlefttruncated, \@regexpr
 748     );
 749 }
 750
 751 # STEMMING
 752 sub _build_stemmed_operand {
 753     my ($operand) = @_;
 754     my $stemmed_operand;
 755
 756     # If operand contains a digit, it is almost certainly an identifier, and should
 757     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 758     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 759     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 760     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 761     return $operand if $operand =~ /\d/;
 762
 763 # FIXME: the locale should be set based on the user's language and/or search choice
 764     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 765
 766 # FIXME: these should be stored in the db so the librarian can modify the behavior
 767     $stemmer->add_exceptions(
 768         {
 769             'and' => 'and',
 770             'or'  => 'or',
 771             'not' => 'not',
 772         }
 773     );
 774     my @words = split( / /, $operand );
 775     my $stems = $stemmer->stem(@words);
 776     for my $stem (@$stems) {
 777         $stemmed_operand .= "$stem";
 778         $stemmed_operand .= "?"
 779           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 780         $stemmed_operand .= " ";
 781     }
 782     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 783     return $stemmed_operand;
 784 }
 785
 786 # FIELD WEIGHTING
 787 sub _build_weighted_query {
 788
 789 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 790 # pretty well but could work much better if we had a smarter query parser
 791     my ( $operand, $stemmed_operand, $index ) = @_;
 792     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 793     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 794     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 795
 796     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 797
 798     # Keyword, or, no index specified
 799     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 800         $weighted_query .=
 801           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 802         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 803         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 804           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 805           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 806         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 807           if $fuzzy_enabled;    # add fuzzy, word list
 808         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 809           if ( $stemming and $stemmed_operand )
 810           ;                     # add stemming, right truncation
 811         $weighted_query .= " or wrdl,r9=\"$operand\"";
 812
 813         # embedded sorting: 0 a-z; 1 z-a
 814         # $weighted_query .= ") or (sort1,aut=1";
 815     }
 816
 817     # Barcode searches should skip this process
 818     elsif ( $index eq 'bc' ) {
 819         $weighted_query .= "bc=\"$operand\"";
 820     }
 821
 822     # Authority-number searches should skip this process
 823     elsif ( $index eq 'an' ) {
 824         $weighted_query .= "an=\"$operand\"";
 825     }
 826
 827     # If the index already has more than one qualifier, wrap the operand
 828     # in quotes and pass it back (assumption is that the user knows what they
 829     # are doing and won't appreciate us mucking up their query
 830     elsif ( $index =~ ',' ) {
 831         $weighted_query .= " $index=\"$operand\"";
 832     }
 833
 834     #TODO: build better cases based on specific search indexes
 835     else {
 836         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 837           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 838         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 839         $weighted_query .=
 840           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 841     }
 842
 843     $weighted_query .= "))";                       # close rank specification
 844     return $weighted_query;
 845 }
 846
 847 =head2 buildQuery
 848
 849 ( $error, $query,
 850 $simple_query, $query_cgi,
 851 $query_desc, $limit,
 852 $limit_cgi, $limit_desc,
 853 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 854
 855 Build queries and limits in CCL, CGI, Human,
 856 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 857
 858 See verbose embedded documentation.
 859
 860
 861 =cut
 862
 863 sub buildQuery {
 864     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 865
 866     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 867
 868     # dereference
 869     my @operators = @$operators if $operators;
 870     my @indexes   = @$indexes   if $indexes;
 871     my @operands  = @$operands  if $operands;
 872     my @limits    = @$limits    if $limits;
 873     my @sort_by   = @$sort_by   if $sort_by;
 874
 875     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 876     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 877     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 878     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 879     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 880
 881     # no stemming/weight/fuzzy in NoZebra
 882     if ( C4::Context->preference("NoZebra") ) {
 883         $stemming      = 0;
 884         $weight_fields = 0;
 885         $fuzzy_enabled = 0;
 886     }
 887
 888     my $query        = $operands[0];
 889     my $simple_query = $operands[0];
 890
 891     # initialize the variables we're passing back
 892     my $query_cgi;
 893     my $query_desc;
 894     my $query_type;
 895
 896     my $limit;
 897     my $limit_cgi;
 898     my $limit_desc;
 899
 900     my $stopwords_removed;    # flag to determine if stopwords have been removed
 901
 902 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 903 # DIAGNOSTIC ONLY!!
 904     if ( $query =~ /^ccl=/ ) {
 905         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 906     }
 907     if ( $query =~ /^cql=/ ) {
 908         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 909     }
 910     if ( $query =~ /^pqf=/ ) {
 911         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 912     }
 913
 914     # pass nested queries directly
 915     # FIXME: need better handling of some of these variables in this case
 916     if ( $query =~ /(\(|\))/ ) {
 917         return (
 918             undef,              $query, $simple_query, $query_cgi,
 919             $query,             $limit, $limit_cgi,    $limit_desc,
 920             $stopwords_removed, 'ccl'
 921         );
 922     }
 923
 924 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 925 # query operands and indexes and add stemming, truncation, field weighting, etc.
 926 # Once we do so, we'll end up with a value in $query, just like if we had an
 927 # incoming $query from the user
 928     else {
 929         $query = ""
 930           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 931         my $previous_operand
 932           ;    # a flag used to keep track if there was a previous query
 933                # if there was, we can apply the current operator
 934                # for every operand
 935         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 936
 937             # COMBINE OPERANDS, INDEXES AND OPERATORS
 938             if ( $operands[$i] ) {
 939
 940               # A flag to determine whether or not to add the index to the query
 941                 my $indexes_set;
 942
 943 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 944                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 945                     $weight_fields    = 0;
 946                     $stemming         = 0;
 947                     $remove_stopwords = 0;
 948                 }
 949                 my $operand = $operands[$i];
 950                 my $index   = $indexes[$i];
 951
 952                 # Add index-specific attributes
 953                 # Date of Publication
 954                 if ( $index eq 'yr' ) {
 955                     $index .= ",st-numeric";
 956                     $indexes_set++;
 957                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 958                 }
 959
 960                 # Date of Acquisition
 961                 elsif ( $index eq 'acqdate' ) {
 962                     $index .= ",st-date-normalized";
 963                     $indexes_set++;
 964                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 965                 }
 966                 # ISBN,ISSN,Standard Number, don't need special treatment
 967                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 968                     $indexes_set++;
 969                     (
 970                         $stemming,      $auto_truncation,
 971                         $weight_fields, $fuzzy_enabled,
 972                         $remove_stopwords
 973                     ) = ( 0, 0, 0, 0, 0 );
 974
 975                 }
 976                 # Set default structure attribute (word list)
 977                 my $struct_attr;
 978                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 979                     $struct_attr = ",wrdl";
 980                 }
 981
 982                 # Some helpful index variants
 983                 my $index_plus       = $index . $struct_attr . ":" if $index;
 984                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 985                 if ($auto_truncation){
 986 #                                       FIXME Auto Truncation is only valid for LTR languages
 987 #                                       use C4::Output;
 988 #                                       use C4::Languages qw(regex_lang_subtags get_bidi);
 989 #                               $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
 990 #                                   my $current_lang = regex_lang_subtags($lang);
 991 #                                   my $bidi;
 992 #                                   $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
 993                                         $index_plus_comma .= "rtrn:";
 994                                 }
 995
 996                 # Remove Stopwords
 997                 if ($remove_stopwords) {
 998                     ( $operand, $stopwords_removed ) =
 999                       _remove_stopwords( $operand, $index );
1000                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
1001                     warn "REMOVED STOPWORDS: @$stopwords_removed"
1002                       if ( $stopwords_removed && $DEBUG );
1003                 }
1004
1005                 # Detect Truncation
1006                 my $truncated_operand;
1007                 my( $nontruncated, $righttruncated, $lefttruncated,
1008                     $rightlefttruncated, $regexpr
1009                 ) = _detect_truncation( $operand, $index );
1010                 warn
1011 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1012                   if $DEBUG;
1013
1014                 # Apply Truncation
1015                 if (
1016                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1017                     scalar(@$rightlefttruncated) > 0 )
1018                 {
1019
1020                # Don't field weight or add the index to the query, we do it here
1021                     $indexes_set = 1;
1022                     undef $weight_fields;
1023                     my $previous_truncation_operand;
1024                     if ( scalar(@$nontruncated) > 0 ) {
1025                         $truncated_operand .= "$index_plus @$nontruncated ";
1026                         $previous_truncation_operand = 1;
1027                     }
1028                     if ( scalar(@$righttruncated) > 0 ) {
1029                         $truncated_operand .= "and "
1030                           if $previous_truncation_operand;
1031                         $truncated_operand .=
1032                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1033                         $previous_truncation_operand = 1;
1034                     }
1035                     if ( scalar(@$lefttruncated) > 0 ) {
1036                         $truncated_operand .= "and "
1037                           if $previous_truncation_operand;
1038                         $truncated_operand .=
1039                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1040                         $previous_truncation_operand = 1;
1041                     }
1042                     if ( scalar(@$rightlefttruncated) > 0 ) {
1043                         $truncated_operand .= "and "
1044                           if $previous_truncation_operand;
1045                         $truncated_operand .=
1046                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1047                         $previous_truncation_operand = 1;
1048                     }
1049                 }
1050                 $operand = $truncated_operand if $truncated_operand;
1051                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1052
1053                 # Handle Stemming
1054                 my $stemmed_operand;
1055                 $stemmed_operand = _build_stemmed_operand($operand)
1056                   if $stemming;
1057                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1058
1059                 # Handle Field Weighting
1060                 my $weighted_operand;
1061                 $weighted_operand =
1062                   _build_weighted_query( $operand, $stemmed_operand, $index )
1063                   if $weight_fields;
1064                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1065                 $operand = $weighted_operand if $weight_fields;
1066                 $indexes_set = 1 if $weight_fields;
1067
1068                 # If there's a previous operand, we need to add an operator
1069                 if ($previous_operand) {
1070
1071                     # User-specified operator
1072                     if ( $operators[ $i - 1 ] ) {
1073                         $query     .= " $operators[$i-1] ";
1074                         $query     .= " $index_plus " unless $indexes_set;
1075                         $query     .= " $operand";
1076                         $query_cgi .= "&op=$operators[$i-1]";
1077                         $query_cgi .= "&idx=$index" if $index;
1078                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1079                         $query_desc .=
1080                           " $operators[$i-1] $index_plus $operands[$i]";
1081                     }
1082
1083                     # Default operator is and
1084                     else {
1085                         $query      .= " and ";
1086                         $query      .= "$index_plus " unless $indexes_set;
1087                         $query      .= "$operand";
1088                         $query_cgi  .= "&op=and&idx=$index" if $index;
1089                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1090                         $query_desc .= " and $index_plus $operands[$i]";
1091                     }
1092                 }
1093
1094                 # There isn't a pervious operand, don't need an operator
1095                 else {
1096
1097                     # Field-weighted queries already have indexes set
1098                     $query .= " $index_plus " unless $indexes_set;
1099                     $query .= $operand;
1100                     $query_desc .= " $index_plus $operands[$i]";
1101                     $query_cgi  .= "&idx=$index" if $index;
1102                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1103                     $previous_operand = 1;
1104                 }
1105             }    #/if $operands
1106         }    # /for
1107     }
1108     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1109
1110     # add limits
1111     my $group_OR_limits;
1112     my $availability_limit;
1113     foreach my $this_limit (@limits) {
1114         if ( $this_limit =~ /available/ ) {
1115
1116 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1117 # In English:
1118 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1119             $availability_limit .=
1120 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1121             $limit_cgi  .= "&limit=available";
1122             $limit_desc .= "";
1123         }
1124
1125         # group_OR_limits, prefixed by mc-
1126         # OR every member of the group
1127         elsif ( $this_limit =~ /mc/ ) {
1128             $group_OR_limits .= " or " if $group_OR_limits;
1129             $limit_desc      .= " or " if $group_OR_limits;
1130             $group_OR_limits .= "$this_limit";
1131             $limit_cgi       .= "&limit=$this_limit";
1132             $limit_desc      .= " $this_limit";
1133         }
1134
1135         # Regular old limits
1136         else {
1137             $limit .= " and " if $limit || $query;
1138             $limit      .= "$this_limit";
1139             $limit_cgi  .= "&limit=$this_limit";
1140             if ($this_limit =~ /^branch:(.+)/) {
1141                 my $branchcode = $1;
1142                 my $branchname = GetBranchName($branchcode);
1143                 if (defined $branchname) {
1144                     $limit_desc .= " branch:$branchname";
1145                 } else {
1146                     $limit_desc .= " $this_limit";
1147                 }
1148             } else {
1149                 $limit_desc .= " $this_limit";
1150             }
1151         }
1152     }
1153     if ($group_OR_limits) {
1154         $limit .= " and " if ( $query || $limit );
1155         $limit .= "($group_OR_limits)";
1156     }
1157     if ($availability_limit) {
1158         $limit .= " and " if ( $query || $limit );
1159         $limit .= "($availability_limit)";
1160     }
1161
1162     # Normalize the query and limit strings
1163     $query =~ s/:/=/g;
1164     $limit =~ s/:/=/g;
1165     for ( $query, $query_desc, $limit, $limit_desc ) {
1166         $_ =~ s/  / /g;    # remove extra spaces
1167         $_ =~ s/^ //g;     # remove any beginning spaces
1168         $_ =~ s/ $//g;     # remove any ending spaces
1169         $_ =~ s/==/=/g;    # remove double == from query
1170     }
1171     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1172
1173     for ($query_cgi,$simple_query) {
1174         $_ =~ s/"//g;
1175     }
1176     # append the limit to the query
1177     $query .= " " . $limit;
1178
1179     # Warnings if DEBUG
1180     if ($DEBUG) {
1181         warn "QUERY:" . $query;
1182         warn "QUERY CGI:" . $query_cgi;
1183         warn "QUERY DESC:" . $query_desc;
1184         warn "LIMIT:" . $limit;
1185         warn "LIMIT CGI:" . $limit_cgi;
1186         warn "LIMIT DESC:" . $limit_desc;
1187         warn "---------\nLeave buildQuery\n---------";
1188     }
1189     return (
1190         undef,              $query, $simple_query, $query_cgi,
1191         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1192         $stopwords_removed, $query_type
1193     );
1194 }
1195
1196 =head2 searchResults
1197
1198 Format results in a form suitable for passing to the template
1199
1200 =cut
1201
1202 # IMO this subroutine is pretty messy still -- it's responsible for
1203 # building the HTML output for the template
1204 sub searchResults {
1205     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1206     my $dbh = C4::Context->dbh;
1207     my $even = 1;
1208     my @newresults;
1209
1210     # add search-term highlighting via <span>s on the search terms
1211     my $span_terms_hashref;
1212     for my $span_term ( split( / /, $searchdesc ) ) {
1213         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1214         $span_terms_hashref->{$span_term}++;
1215     }
1216
1217     #Build branchnames hash
1218     #find branchname
1219     #get branch information.....
1220     my %branches;
1221     my $bsth =
1222       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1223       ;    # FIXME : use C4::Koha::GetBranches
1224     $bsth->execute();
1225     while ( my $bdata = $bsth->fetchrow_hashref ) {
1226         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1227     }
1228 # FIXME - We build an authorised values hash here, using the default framework
1229 # though it is possible to have different authvals for different fws.
1230
1231     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1232
1233     # get notforloan authorised value list (see $shelflocations  FIXME)
1234     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1235
1236     #Build itemtype hash
1237     #find itemtype & itemtype image
1238     my %itemtypes;
1239     $bsth =
1240       $dbh->prepare(
1241         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1242       );
1243     $bsth->execute();
1244     while ( my $bdata = $bsth->fetchrow_hashref ) {
1245                 foreach (qw(description imageurl summary notforloan)) {
1246                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1247                 }
1248     }
1249
1250     #search item field code
1251     my $sth =
1252       $dbh->prepare(
1253 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1254       );
1255     $sth->execute;
1256     my ($itemtag) = $sth->fetchrow;
1257
1258     ## find column names of items related to MARC
1259     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1260     $sth2->execute;
1261     my %subfieldstosearch;
1262     while ( ( my $column ) = $sth2->fetchrow ) {
1263         my ( $tagfield, $tagsubfield ) =
1264           &GetMarcFromKohaField( "items." . $column, "" );
1265         $subfieldstosearch{$column} = $tagsubfield;
1266     }
1267
1268     # handle which records to actually retrieve
1269     my $times;
1270     if ( $hits && $offset + $results_per_page <= $hits ) {
1271         $times = $offset + $results_per_page;
1272     }
1273     else {
1274         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1275     }
1276
1277         my $marcflavour = C4::Context->preference("marcflavour");
1278     # loop through all of the records we've retrieved
1279     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1280         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1281         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1282         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1283         $oldbiblio->{result_number} = $i + 1;
1284
1285         # add imageurl to itemtype if there is one
1286         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1287
1288         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1289                 $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1290                 $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1291                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1292                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1293                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1294                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1295  # Build summary if there is one (the summary is defined in the itemtypes table)
1296  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1297         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1298             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1299             my @fields  = $marcrecord->fields();
1300             foreach my $field (@fields) {
1301                 my $tag      = $field->tag();
1302                 my $tagvalue = $field->as_string();
1303                 $summary =~
1304                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1305                 unless ( $tag < 10 ) {
1306                     my @subf = $field->subfields;
1307                     for my $i ( 0 .. $#subf ) {
1308                         my $subfieldcode  = $subf[$i][0];
1309                         my $subfieldvalue = $subf[$i][1];
1310                         my $tagsubf       = $tag . $subfieldcode;
1311                         $summary =~
1312 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1313                     }
1314                 }
1315             }
1316             # FIXME: yuk
1317             $summary =~ s/\[(.*?)]//g;
1318             $summary =~ s/\n/<br\/>/g;
1319             $oldbiblio->{summary} = $summary;
1320         }
1321
1322         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1323         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1324         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1325         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1326         # Add search-term highlighting to the whole record where they match using <span>s
1327         if (C4::Context->preference("OpacHighlightedWords")){
1328             my $searchhighlightblob;
1329             for my $highlight_field ( $marcrecord->fields ) {
1330
1331     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1332                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1333                 for my $subfield ($highlight_field->subfields()) {
1334                     my $match;
1335                     next if $subfield->[0] eq '9';
1336                     my $field = $subfield->[1];
1337                     for my $term ( keys %$span_terms_hashref ) {
1338                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1339                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1340                         $match++;
1341                         }
1342                     }
1343                     $searchhighlightblob .= $field . " ... " if $match;
1344                 }
1345
1346             }
1347             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1348             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1349         }
1350
1351         # Add search-term highlighting to the title, subtitle, etc. fields
1352         for my $term ( keys %$span_terms_hashref ) {
1353             my $old_term = $term;
1354             if ( length($term) > 3 ) {
1355                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1356                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1357                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1358                                 }
1359             }
1360         }
1361
1362         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1363
1364         # Pull out the items fields
1365         my @fields = $marcrecord->field($itemtag);
1366
1367         # Setting item statuses for display
1368         my @available_items_loop;
1369         my @onloan_items_loop;
1370         my @other_items_loop;
1371
1372         my $available_items;
1373         my $onloan_items;
1374         my $other_items;
1375
1376         my $ordered_count         = 0;
1377         my $available_count       = 0;
1378         my $onloan_count          = 0;
1379         my $longoverdue_count     = 0;
1380         my $other_count           = 0;
1381         my $wthdrawn_count        = 0;
1382         my $itemlost_count        = 0;
1383         my $itembinding_count     = 0;
1384         my $itemdamaged_count     = 0;
1385         my $item_in_transit_count = 0;
1386         my $can_place_holds       = 0;
1387         my $items_count           = scalar(@fields);
1388         my $maxitems =
1389           ( C4::Context->preference('maxItemsinSearchResults') )
1390           ? C4::Context->preference('maxItemsinSearchResults') - 1
1391           : 1;
1392
1393         # loop through every item
1394         foreach my $field (@fields) {
1395             my $item;
1396
1397             # populate the items hash
1398             foreach my $code ( keys %subfieldstosearch ) {
1399                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1400             }
1401                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1402                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1403             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1404             if ($item->{$hbranch}) {
1405                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1406             }
1407             elsif ($item->{$otherbranch}) {     # Last resort
1408                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1409             }
1410
1411                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1412 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1413             if ( $item->{onloan} ) {
1414                 $onloan_count++;
1415                                 my $key = $prefix . $item->{due_date};
1416                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1417                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1418                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1419                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1420                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1421                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1422                 # if something's checked out and lost, mark it as 'long overdue'
1423                 if ( $item->{itemlost} ) {
1424                     $onloan_items->{$prefix}->{longoverdue}++;
1425                     $longoverdue_count++;
1426                 } else {        # can place holds as long as item isn't lost
1427                     $can_place_holds = 1;
1428                 }
1429             }
1430
1431          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1432             else {
1433
1434                 # item is on order
1435                 if ( $item->{notforloan} == -1 ) {
1436                     $ordered_count++;
1437                 }
1438
1439                 # is item in transit?
1440                 my $transfertwhen = '';
1441                 my ($transfertfrom, $transfertto);
1442
1443                 unless ($item->{wthdrawn}
1444                         || $item->{itemlost}
1445                         || $item->{damaged}
1446                         || $item->{notforloan}
1447                         || $items_count > 20) {
1448
1449                     # A couple heuristics to limit how many times
1450                     # we query the database for item transfer information, sacrificing
1451                     # accuracy in some cases for speed;
1452                     #
1453                     # 1. don't query if item has one of the other statuses
1454                     # 2. don't check transit status if the bib has
1455                     #    more than 20 items
1456                     #
1457                     # FIXME: to avoid having the query the database like this, and to make
1458                     #        the in transit status count as unavailable for search limiting,
1459                     #        should map transit status to record indexed in Zebra.
1460                     #
1461                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1462                 }
1463
1464                 # item is withdrawn, lost or damaged
1465                 if (   $item->{wthdrawn}
1466                     || $item->{itemlost}
1467                     || $item->{damaged}
1468                     || $item->{notforloan}
1469                     || ($transfertwhen ne ''))
1470                 {
1471                     $wthdrawn_count++        if $item->{wthdrawn};
1472                     $itemlost_count++        if $item->{itemlost};
1473                     $itemdamaged_count++     if $item->{damaged};
1474                     $item_in_transit_count++ if $transfertwhen ne '';
1475                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1476                     $other_count++;
1477
1478                                         my $key = $prefix . $item->{status};
1479                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1480                         $other_items->{$key}->{$_} = $item->{$_};
1481                                         }
1482                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1483                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1484                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1485                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1486                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1487                 }
1488                 # item is available
1489                 else {
1490                     $can_place_holds = 1;
1491                     $available_count++;
1492                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1493                                         foreach (qw(branchname itemcallnumber)) {
1494                         $available_items->{$prefix}->{$_} = $item->{$_};
1495                                         }
1496                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1497                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1498                 }
1499             }
1500         }    # notforloan, item level and biblioitem level
1501         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1502         $maxitems =
1503           ( C4::Context->preference('maxItemsinSearchResults') )
1504           ? C4::Context->preference('maxItemsinSearchResults') - 1
1505           : 1;
1506         for my $key ( sort keys %$onloan_items ) {
1507             (++$onloanitemscount > $maxitems) and last;
1508             push @onloan_items_loop, $onloan_items->{$key};
1509         }
1510         for my $key ( sort keys %$other_items ) {
1511             (++$otheritemscount > $maxitems) and last;
1512             push @other_items_loop, $other_items->{$key};
1513         }
1514         for my $key ( sort keys %$available_items ) {
1515             (++$availableitemscount > $maxitems) and last;
1516             push @available_items_loop, $available_items->{$key}
1517         }
1518
1519         # XSLT processing of some stuff
1520         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1521             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1522             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1523         }
1524
1525         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1526         $can_place_holds = 0
1527           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1528         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1529         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1530         $oldbiblio->{items_count}          = $items_count;
1531         $oldbiblio->{available_items_loop} = \@available_items_loop;
1532         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1533         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1534         $oldbiblio->{availablecount}       = $available_count;
1535         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1536         $oldbiblio->{onloancount}          = $onloan_count;
1537         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1538         $oldbiblio->{othercount}           = $other_count;
1539         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1540         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1541         $oldbiblio->{itemlostcount}        = $itemlost_count;
1542         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1543         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1544         $oldbiblio->{orderedcount}         = $ordered_count;
1545         push( @newresults, $oldbiblio );
1546     }
1547     return @newresults;
1548 }
1549
1550 #----------------------------------------------------------------------
1551 #
1552 # Non-Zebra GetRecords#
1553 #----------------------------------------------------------------------
1554
1555 =head2 NZgetRecords
1556
1557   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1558
1559 =cut
1560
1561 sub NZgetRecords {
1562     my (
1563         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1564         $results_per_page, $offset,       $expanded_facet, $branches,
1565         $query_type,       $scan
1566     ) = @_;
1567     warn "query =$query" if $DEBUG;
1568     my $result = NZanalyse($query);
1569     warn "results =$result" if $DEBUG;
1570     return ( undef,
1571         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1572         undef );
1573 }
1574
1575 =head2 NZanalyse
1576
1577   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1578   the list is built from an inverted index in the nozebra SQL table
1579   note that title is here only for convenience : the sorting will be very fast when requested on title
1580   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1581
1582 =cut
1583
1584 sub NZanalyse {
1585     my ( $string, $server ) = @_;
1586 #     warn "---------"       if $DEBUG;
1587     warn " NZanalyse" if $DEBUG;
1588 #     warn "---------"       if $DEBUG;
1589
1590  # $server contains biblioserver or authorities, depending on what we search on.
1591  #warn "querying : $string on $server";
1592     $server = 'biblioserver' unless $server;
1593
1594 # if we have a ", replace the content to discard temporarily any and/or/not inside
1595     my $commacontent;
1596     if ( $string =~ /"/ ) {
1597         $string =~ s/"(.*?)"/__X__/;
1598         $commacontent = $1;
1599         warn "commacontent : $commacontent" if $DEBUG;
1600     }
1601
1602 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1603 # then, call again NZanalyse with $left and $right
1604 # (recursive until we find a leaf (=> something without and/or/not)
1605 # delete repeated operator... Would then go in infinite loop
1606     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1607     }
1608
1609     #process parenthesis before.
1610     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1611         my $left     = $1;
1612         my $right    = $4;
1613         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1614         warn
1615 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1616           if $DEBUG;
1617         my $leftresult = NZanalyse( $left, $server );
1618         if ($operator) {
1619             my $rightresult = NZanalyse( $right, $server );
1620
1621             # OK, we have the results for right and left part of the query
1622             # depending of operand, intersect, union or exclude both lists
1623             # to get a result list
1624             if ( $operator eq ' and ' ) {
1625                 return NZoperatorAND($leftresult,$rightresult);
1626             }
1627             elsif ( $operator eq ' or ' ) {
1628
1629                 # just merge the 2 strings
1630                 return $leftresult . $rightresult;
1631             }
1632             elsif ( $operator eq ' not ' ) {
1633                 return NZoperatorNOT($leftresult,$rightresult);
1634             }
1635         }
1636         else {
1637 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1638             return $leftresult;
1639         }
1640     }
1641     warn "string :" . $string if $DEBUG;
1642     my $left = "";
1643     my $right = "";
1644     my $operator = "";
1645     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1646         $left     = $1;
1647         $right    = $3;
1648         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1649     }
1650     warn "no parenthesis. left : $left operator: $operator right: $right"
1651       if $DEBUG;
1652
1653     # it's not a leaf, we have a and/or/not
1654     if ($operator) {
1655
1656         # reintroduce comma content if needed
1657         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1658         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1659         warn "node : $left / $operator / $right\n" if $DEBUG;
1660         my $leftresult  = NZanalyse( $left,  $server );
1661         my $rightresult = NZanalyse( $right, $server );
1662         warn " leftresult : $leftresult" if $DEBUG;
1663         warn " rightresult : $rightresult" if $DEBUG;
1664         # OK, we have the results for right and left part of the query
1665         # depending of operand, intersect, union or exclude both lists
1666         # to get a result list
1667         if ( $operator eq ' and ' ) {
1668             warn "NZAND";
1669             return NZoperatorAND($leftresult,$rightresult);
1670         }
1671         elsif ( $operator eq ' or ' ) {
1672
1673             # just merge the 2 strings
1674             return $leftresult . $rightresult;
1675         }
1676         elsif ( $operator eq ' not ' ) {
1677             return NZoperatorNOT($leftresult,$rightresult);
1678         }
1679         else {
1680
1681 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1682             die "error : operand unknown : $operator for $string";
1683         }
1684
1685         # it's a leaf, do the real SQL query and return the result
1686     }
1687     else {
1688         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1689         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1690         #remove trailing blank at the beginning
1691         $string =~ s/^ //g;
1692         warn "leaf:$string" if $DEBUG;
1693
1694         # parse the string in in operator/operand/value again
1695         my $left = "";
1696         my $operator = "";
1697         my $right = "";
1698         if ($string =~ /(.*)(>=|<=)(.*)/) {
1699             $left     = $1;
1700             $operator = $2;
1701             $right    = $3;
1702         } else {
1703             $left = $string;
1704         }
1705 #         warn "handling leaf... left:$left operator:$operator right:$right"
1706 #           if $DEBUG;
1707         unless ($operator) {
1708             if ($string =~ /(.*)(>|<|=)(.*)/) {
1709                 $left     = $1;
1710                 $operator = $2;
1711                 $right    = $3;
1712                 warn
1713     "handling unless (operator)... left:$left operator:$operator right:$right"
1714                 if $DEBUG;
1715             } else {
1716                 $left = $string;
1717             }
1718         }
1719         my $results;
1720
1721 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1722         $left =~ s/ .*$//;
1723
1724         # automatic replace for short operators
1725         $left = 'title'            if $left =~ '^ti$';
1726         $left = 'author'           if $left =~ '^au$';
1727         $left = 'publisher'        if $left =~ '^pb$';
1728         $left = 'subject'          if $left =~ '^su$';
1729         $left = 'koha-Auth-Number' if $left =~ '^an$';
1730         $left = 'keyword'          if $left =~ '^kw$';
1731         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1732         if ( $operator && $left ne 'keyword' ) {
1733
1734             #do a specific search
1735             my $dbh = C4::Context->dbh;
1736             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1737             my $sth =
1738               $dbh->prepare(
1739 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1740               );
1741             warn "$left / $operator / $right\n" if $DEBUG;
1742
1743             # split each word, query the DB and build the biblionumbers result
1744             #sanitizing leftpart
1745             $left =~ s/^\s+|\s+$//;
1746             foreach ( split / /, $right ) {
1747                 my $biblionumbers;
1748                 $_ =~ s/^\s+|\s+$//;
1749                 next unless $_;
1750                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1751                 $sth->execute( $server, $left, $_ )
1752                   or warn "execute failed: $!";
1753                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1754
1755 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1756 # otherwise, fill the result
1757                     $biblionumbers .= $line
1758                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1759                     warn "result : $value "
1760                       . ( $right  =~ /\d/ ) . "=="
1761                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1762                 }
1763
1764 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1765                 if ($results) {
1766                     warn "NZAND" if $DEBUG;
1767                     $results = NZoperatorAND($biblionumbers,$results);
1768                 }
1769                 else {
1770                     $results = $biblionumbers;
1771                 }
1772             }
1773         }
1774         else {
1775
1776       #do a complete search (all indexes), if index='kw' do complete search too.
1777             my $dbh = C4::Context->dbh;
1778             my $sth =
1779               $dbh->prepare(
1780 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1781               );
1782
1783             # split each word, query the DB and build the biblionumbers result
1784             foreach ( split / /, $string ) {
1785                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1786                 warn "search on all indexes on $_" if $DEBUG;
1787                 my $biblionumbers;
1788                 next unless $_;
1789                 $sth->execute( $server, $_ );
1790                 while ( my $line = $sth->fetchrow ) {
1791                     $biblionumbers .= $line;
1792                 }
1793
1794 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1795                 if ($results) {
1796                     $results = NZoperatorAND($biblionumbers,$results);
1797                 }
1798                 else {
1799                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1800                     $results = $biblionumbers;
1801                 }
1802             }
1803         }
1804         warn "return : $results for LEAF : $string" if $DEBUG;
1805         return $results;
1806     }
1807     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1808 }
1809
1810 sub NZoperatorAND{
1811     my ($rightresult, $leftresult)=@_;
1812
1813     my @leftresult = split /;/, $leftresult;
1814     warn " @leftresult / $rightresult \n" if $DEBUG;
1815
1816     #             my @rightresult = split /;/,$leftresult;
1817     my $finalresult;
1818
1819 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1820 # the result is stored twice, to have the same weight for AND than OR.
1821 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1822 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1823     foreach (@leftresult) {
1824         my $value = $_;
1825         my $countvalue;
1826         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1827         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1828             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1829             $finalresult .=
1830                 "$value-$countvalue;$value-$countvalue;";
1831         }
1832     }
1833     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1834     return $finalresult;
1835 }
1836
1837 sub NZoperatorOR{
1838     my ($rightresult, $leftresult)=@_;
1839     return $rightresult.$leftresult;
1840 }
1841
1842 sub NZoperatorNOT{
1843     my ($leftresult, $rightresult)=@_;
1844
1845     my @leftresult = split /;/, $leftresult;
1846
1847     #             my @rightresult = split /;/,$leftresult;
1848     my $finalresult;
1849     foreach (@leftresult) {
1850         my $value=$_;
1851         $value=$1 if $value=~m/(.*)-\d+$/;
1852         unless ($rightresult =~ "$value-") {
1853             $finalresult .= "$_;";
1854         }
1855     }
1856     return $finalresult;
1857 }
1858
1859 =head2 NZorder
1860
1861   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1862
1863   TODO :: Description
1864
1865 =cut
1866
1867 sub NZorder {
1868     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1869     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1870
1871     # order title asc by default
1872     #     $ordering = '1=36 <i' unless $ordering;
1873     $results_per_page = 20 unless $results_per_page;
1874     $offset           = 0  unless $offset;
1875     my $dbh = C4::Context->dbh;
1876
1877     #
1878     # order by POPULARITY
1879     #
1880     if ( $ordering =~ /popularity/ ) {
1881         my %result;
1882         my %popularity;
1883
1884         # popularity is not in MARC record, it's builded from a specific query
1885         my $sth =
1886           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1887         foreach ( split /;/, $biblionumbers ) {
1888             my ( $biblionumber, $title ) = split /,/, $_;
1889             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1890             $sth->execute($biblionumber);
1891             my $popularity = $sth->fetchrow || 0;
1892
1893 # hint : the key is popularity.title because we can have
1894 # many results with the same popularity. In this cas, sub-ordering is done by title
1895 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1896 # (un-frequent, I agree, but we won't forget anything that way ;-)
1897             $popularity{ sprintf( "%10d", $popularity ) . $title
1898                   . $biblionumber } = $biblionumber;
1899         }
1900
1901     # sort the hash and return the same structure as GetRecords (Zebra querying)
1902         my $result_hash;
1903         my $numbers = 0;
1904         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1905             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1906                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1907                   $result{ $popularity{$key} }->as_usmarc();
1908             }
1909         }
1910         else {                                    # sort popularity ASC
1911             foreach my $key ( sort ( keys %popularity ) ) {
1912                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1913                   $result{ $popularity{$key} }->as_usmarc();
1914             }
1915         }
1916         my $finalresult = ();
1917         $result_hash->{'hits'}         = $numbers;
1918         $finalresult->{'biblioserver'} = $result_hash;
1919         return $finalresult;
1920
1921         #
1922         # ORDER BY author
1923         #
1924     }
1925     elsif ( $ordering =~ /author/ ) {
1926         my %result;
1927         foreach ( split /;/, $biblionumbers ) {
1928             my ( $biblionumber, $title ) = split /,/, $_;
1929             my $record = GetMarcBiblio($biblionumber);
1930             my $author;
1931             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1932                 $author = $record->subfield( '200', 'f' );
1933                 $author = $record->subfield( '700', 'a' ) unless $author;
1934             }
1935             else {
1936                 $author = $record->subfield( '100', 'a' );
1937             }
1938
1939 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1940 # and we don't want to get only 1 result for each of them !!!
1941             $result{ $author . $biblionumber } = $record;
1942         }
1943
1944     # sort the hash and return the same structure as GetRecords (Zebra querying)
1945         my $result_hash;
1946         my $numbers = 0;
1947         if ( $ordering eq 'author_za' ) {    # sort by author desc
1948             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1949                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1950                   $result{$key}->as_usmarc();
1951             }
1952         }
1953         else {                               # sort by author ASC
1954             foreach my $key ( sort ( keys %result ) ) {
1955                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1956                   $result{$key}->as_usmarc();
1957             }
1958         }
1959         my $finalresult = ();
1960         $result_hash->{'hits'}         = $numbers;
1961         $finalresult->{'biblioserver'} = $result_hash;
1962         return $finalresult;
1963
1964         #
1965         # ORDER BY callnumber
1966         #
1967     }
1968     elsif ( $ordering =~ /callnumber/ ) {
1969         my %result;
1970         foreach ( split /;/, $biblionumbers ) {
1971             my ( $biblionumber, $title ) = split /,/, $_;
1972             my $record = GetMarcBiblio($biblionumber);
1973             my $callnumber;
1974             my $frameworkcode = GetFrameworkCode($biblionumber);
1975             my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField(  'items.itemcallnumber', $frameworkcode);
1976                ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber', $frameworkcode)
1977                 unless $callnumber_tag;
1978             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1979                 $callnumber = $record->subfield( '200', 'f' );
1980             } else {
1981                 $callnumber = $record->subfield( '100', 'a' );
1982             }
1983
1984 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1985 # and we don't want to get only 1 result for each of them !!!
1986             $result{ $callnumber . $biblionumber } = $record;
1987         }
1988
1989     # sort the hash and return the same structure as GetRecords (Zebra querying)
1990         my $result_hash;
1991         my $numbers = 0;
1992         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1993             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1994                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1995                   $result{$key}->as_usmarc();
1996             }
1997         }
1998         else {                                     # sort by title ASC
1999             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
2000                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2001                   $result{$key}->as_usmarc();
2002             }
2003         }
2004         my $finalresult = ();
2005         $result_hash->{'hits'}         = $numbers;
2006         $finalresult->{'biblioserver'} = $result_hash;
2007         return $finalresult;
2008     }
2009     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2010         my %result;
2011         foreach ( split /;/, $biblionumbers ) {
2012             my ( $biblionumber, $title ) = split /,/, $_;
2013             my $record = GetMarcBiblio($biblionumber);
2014             my ( $publicationyear_tag, $publicationyear_subfield ) =
2015               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2016             my $publicationyear =
2017               $record->subfield( $publicationyear_tag,
2018                 $publicationyear_subfield );
2019
2020 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2021 # and we don't want to get only 1 result for each of them !!!
2022             $result{ $publicationyear . $biblionumber } = $record;
2023         }
2024
2025     # sort the hash and return the same structure as GetRecords (Zebra querying)
2026         my $result_hash;
2027         my $numbers = 0;
2028         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2029             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2030                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2031                   $result{$key}->as_usmarc();
2032             }
2033         }
2034         else {                                 # sort by pub year ASC
2035             foreach my $key ( sort ( keys %result ) ) {
2036                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2037                   $result{$key}->as_usmarc();
2038             }
2039         }
2040         my $finalresult = ();
2041         $result_hash->{'hits'}         = $numbers;
2042         $finalresult->{'biblioserver'} = $result_hash;
2043         return $finalresult;
2044
2045         #
2046         # ORDER BY title
2047         #
2048     }
2049     elsif ( $ordering =~ /title/ ) {
2050
2051 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2052         my %result;
2053         foreach ( split /;/, $biblionumbers ) {
2054             my ( $biblionumber, $title ) = split /,/, $_;
2055
2056 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2057 # and we don't want to get only 1 result for each of them !!!
2058 # hint & speed improvement : we can order without reading the record
2059 # so order, and read records only for the requested page !
2060             $result{ $title . $biblionumber } = $biblionumber;
2061         }
2062
2063     # sort the hash and return the same structure as GetRecords (Zebra querying)
2064         my $result_hash;
2065         my $numbers = 0;
2066         if ( $ordering eq 'title_az' ) {    # sort by title desc
2067             foreach my $key ( sort ( keys %result ) ) {
2068                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2069             }
2070         }
2071         else {                              # sort by title ASC
2072             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2073                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2074             }
2075         }
2076
2077         # limit the $results_per_page to result size if it's more
2078         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2079
2080         # for the requested page, replace biblionumber by the complete record
2081         # speed improvement : avoid reading too much things
2082         for (
2083             my $counter = $offset ;
2084             $counter <= $offset + $results_per_page ;
2085             $counter++
2086           )
2087         {
2088             $result_hash->{'RECORDS'}[$counter] =
2089               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2090         }
2091         my $finalresult = ();
2092         $result_hash->{'hits'}         = $numbers;
2093         $finalresult->{'biblioserver'} = $result_hash;
2094         return $finalresult;
2095     }
2096     else {
2097
2098 #
2099 # order by ranking
2100 #
2101 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2102         my %result;
2103         my %count_ranking;
2104         foreach ( split /;/, $biblionumbers ) {
2105             my ( $biblionumber, $title ) = split /,/, $_;
2106             $title =~ /(.*)-(\d)/;
2107
2108             # get weight
2109             my $ranking = $2;
2110
2111 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2112 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2113 # biblio N has ranking = 6
2114             $count_ranking{$biblionumber} += $ranking;
2115         }
2116
2117 # build the result by "inverting" the count_ranking hash
2118 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2119 #         warn "counting";
2120         foreach ( keys %count_ranking ) {
2121             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2122         }
2123
2124     # sort the hash and return the same structure as GetRecords (Zebra querying)
2125         my $result_hash;
2126         my $numbers = 0;
2127         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2128             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2129         }
2130
2131         # limit the $results_per_page to result size if it's more
2132         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2133
2134         # for the requested page, replace biblionumber by the complete record
2135         # speed improvement : avoid reading too much things
2136         for (
2137             my $counter = $offset ;
2138             $counter <= $offset + $results_per_page ;
2139             $counter++
2140           )
2141         {
2142             $result_hash->{'RECORDS'}[$counter] =
2143               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2144               if $result_hash->{'RECORDS'}[$counter];
2145         }
2146         my $finalresult = ();
2147         $result_hash->{'hits'}         = $numbers;
2148         $finalresult->{'biblioserver'} = $result_hash;
2149         return $finalresult;
2150     }
2151 }
2152
2153 END { }    # module clean-up code here (global destructor)
2154
2155 1;
2156 __END__
2157
2158 =head1 AUTHOR
2159
2160 Koha Developement team <info@koha.org>
2161
2162 =cut