C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30
  31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  32
  33 # set the version for version checking
  34 BEGIN {
  35     $VERSION = 3.01;
  36     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  37 }
  38
  39 =head1 NAME
  40
  41 C4::Search - Functions for searching the Koha catalog.
  42
  43 =head1 SYNOPSIS
  44
  45 See opac/opac-search.pl or catalogue/search.pl for example of usage
  46
  47 =head1 DESCRIPTION
  48
  49 This module provides searching functions for Koha's bibliographic databases
  50
  51 =head1 FUNCTIONS
  52
  53 =cut
  54
  55 @ISA    = qw(Exporter);
  56 @EXPORT = qw(
  57   &findseealso
  58   &FindDuplicate
  59   &SimpleSearch
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 findseealso($dbh,$fields);
  69
  70 C<$dbh> is a link to the DB handler.
  71
  72 use C4::Context;
  73 my $dbh =C4::Context->dbh;
  74
  75 C<$fields> is a reference to the fields array
  76
  77 This function modifies the @$fields array and adds related fields to search on.
  78
  79 FIXME: this function is probably deprecated in Koha 3
  80
  81 =cut
  82
  83 sub findseealso {
  84     my ( $dbh, $fields ) = @_;
  85     my $tagslib = GetMarcStructure(1);
  86     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  87         my ($tag)      = substr( @$fields[$i], 1, 3 );
  88         my ($subfield) = substr( @$fields[$i], 4, 1 );
  89         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  90           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  91     }
  92 }
  93
  94 =head2 FindDuplicate
  95
  96 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  97
  98 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  99
 100 =cut
 101
 102 sub FindDuplicate {
 103     my ($record) = @_;
 104     my $dbh = C4::Context->dbh;
 105     my $result = TransformMarcToKoha( $dbh, $record, '' );
 106     my $sth;
 107     my $query;
 108     my $search;
 109     my $type;
 110     my ( $biblionumber, $title );
 111
 112     # search duplicate on ISBN, easy and fast..
 113     # ... normalize first
 114     if ( $result->{isbn} ) {
 115         $result->{isbn} =~ s/\(.*$//;
 116         $result->{isbn} =~ s/\s+$//;
 117         $query = "isbn=$result->{isbn}";
 118     }
 119     else {
 120         $result->{title} =~ s /\\//g;
 121         $result->{title} =~ s /\"//g;
 122         $result->{title} =~ s /\(//g;
 123         $result->{title} =~ s /\)//g;
 124
 125         # FIXME: instead of removing operators, could just do
 126         # quotes around the value
 127         $result->{title} =~ s/(and|or|not)//g;
 128         $query = "ti,ext=$result->{title}";
 129         $query .= " and itemtype=$result->{itemtype}"
 130           if ( $result->{itemtype} );
 131         if   ( $result->{author} ) {
 132             $result->{author} =~ s /\\//g;
 133             $result->{author} =~ s /\"//g;
 134             $result->{author} =~ s /\(//g;
 135             $result->{author} =~ s /\)//g;
 136
 137             # remove valid operators
 138             $result->{author} =~ s/(and|or|not)//g;
 139             $query .= " and au,ext=$result->{author}";
 140         }
 141     }
 142
 143     # FIXME: add error handling
 144     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 145     my @results;
 146     foreach my $possible_duplicate_record (@$searchresults) {
 147         my $marcrecord =
 148           MARC::Record->new_from_usmarc($possible_duplicate_record);
 149         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 150
 151         # FIXME :: why 2 $biblionumber ?
 152         if ($result) {
 153             push @results, $result->{'biblionumber'};
 154             push @results, $result->{'title'};
 155         }
 156     }
 157     return @results;
 158 }
 159
 160 =head2 SimpleSearch
 161
 162 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 163
 164 This function provides a simple search API on the bibliographic catalog
 165
 166 =over 2
 167
 168 =item C<input arg:>
 169
 170     * $query can be a simple keyword or a complete CCL query
 171     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 172     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 173     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 174
 175
 176 =item C<Output:>
 177
 178     * $error is a empty unless an error is detected
 179     * \@results is an array of records.
 180     * $total_hits is the number of hits that would have been returned with no limit
 181
 182 =item C<usage in the script:>
 183
 184 =back
 185
 186 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 187
 188 if (defined $error) {
 189     $template->param(query_error => $error);
 190     warn "error: ".$error;
 191     output_html_with_http_headers $input, $cookie, $template->output;
 192     exit;
 193 }
 194
 195 my $hits = scalar @$marcresults;
 196 my @results;
 197
 198 for my $i (0..$hits) {
 199     my %resultsloop;
 200     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 201     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 202
 203     #build the hash for the template.
 204     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 205     $resultsloop{title}           = $biblio->{'title'};
 206     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 207     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 208     $resultsloop{author}          = $biblio->{'author'};
 209     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 210     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 211
 212     push @results, \%resultsloop;
 213 }
 214
 215 $template->param(result=>\@results);
 216
 217 =cut
 218
 219 sub SimpleSearch {
 220     my ( $query, $offset, $max_results, $servers )  = @_;
 221
 222     if ( C4::Context->preference('NoZebra') ) {
 223         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 224         my $search_result =
 225           (      $result->{hits}
 226               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 227         return ( undef, $search_result, scalar($search_result) );
 228     }
 229     else {
 230         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 231         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 232         my @results;
 233         my @zoom_queries;
 234         my @tmpresults;
 235         my @zconns;
 236         my $total_hits;
 237         return ( "No query entered", undef, undef ) unless $query;
 238
 239         # Initialize & Search Zebra
 240         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 241             eval {
 242                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 243                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 244                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 245
 246                 # error handling
 247                 my $error =
 248                     $zconns[$i]->errmsg() . " ("
 249                   . $zconns[$i]->errcode() . ") "
 250                   . $zconns[$i]->addinfo() . " "
 251                   . $zconns[$i]->diagset();
 252
 253                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 254             };
 255             if ($@) {
 256
 257                 # caught a ZOOM::Exception
 258                 my $error =
 259                     $@->message() . " ("
 260                   . $@->code() . ") "
 261                   . $@->addinfo() . " "
 262                   . $@->diagset();
 263                 warn $error;
 264                 return ( $error, undef, undef );
 265             }
 266         }
 267         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 268             my $event = $zconns[ $i - 1 ]->last_event();
 269             if ( $event == ZOOM::Event::ZEND ) {
 270
 271                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 272                 my $hits = $tmpresults[ $i - 1 ]->size();
 273                 $total_hits += $hits;
 274                 my $last_record = $hits;
 275                 if ( defined $max_results && $offset + $max_results < $hits ) {
 276                     $last_record  = $offset + $max_results;
 277                 }
 278
 279                 for my $j ( $first_record..$last_record ) {
 280                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 281                     push @results, $record;
 282                 }
 283             }
 284         }
 285
 286         foreach my $result (@tmpresults) {
 287             $result->destroy();
 288         }
 289         foreach my $zoom_query (@zoom_queries) {
 290             $zoom_query->destroy();
 291         }
 292
 293         return ( undef, \@results, $total_hits );
 294     }
 295 }
 296
 297 =head2 getRecords
 298
 299 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 300
 301         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 302         $results_per_page, $offset,       $expanded_facet, $branches,
 303         $query_type,       $scan
 304     );
 305
 306 The all singing, all dancing, multi-server, asynchronous, scanning,
 307 searching, record nabbing, facet-building
 308
 309 See verbse embedded documentation.
 310
 311 =cut
 312
 313 sub getRecords {
 314     my (
 315         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 316         $results_per_page, $offset,       $expanded_facet, $branches,
 317         $query_type,       $scan
 318     ) = @_;
 319
 320     my @servers = @$servers_ref;
 321     my @sort_by = @$sort_by_ref;
 322
 323     # Initialize variables for the ZOOM connection and results object
 324     my $zconn;
 325     my @zconns;
 326     my @results;
 327     my $results_hashref = ();
 328
 329     # Initialize variables for the faceted results objects
 330     my $facets_counter = ();
 331     my $facets_info    = ();
 332     my $facets         = getFacets();
 333
 334     my @facets_loop
 335       ;    # stores the ref to array of hashes for template facets loop
 336
 337     ### LOOP THROUGH THE SERVERS
 338     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 339         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 340
 341 # perform the search, create the results objects
 342 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 343         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 344
 345         #$query_to_use = $simple_query if $scan;
 346         warn $simple_query if ( $scan and $DEBUG );
 347
 348         # Check if we've got a query_type defined, if so, use it
 349         eval {
 350             if ($query_type)
 351             {
 352                 if ( $query_type =~ /^ccl/ ) {
 353                     $query_to_use =~
 354                       s/\:/\=/g;    # change : to = last minute (FIXME)
 355                     $results[$i] =
 356                       $zconns[$i]->search(
 357                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 358                       );
 359                 }
 360                 elsif ( $query_type =~ /^cql/ ) {
 361                     $results[$i] =
 362                       $zconns[$i]->search(
 363                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 364                 }
 365                 elsif ( $query_type =~ /^pqf/ ) {
 366                     $results[$i] =
 367                       $zconns[$i]->search(
 368                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 369                 }
 370             }
 371             else {
 372                 if ($scan) {
 373                     $results[$i] =
 374                       $zconns[$i]->scan(
 375                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 376                       );
 377                 }
 378                 else {
 379                     $results[$i] =
 380                       $zconns[$i]->search(
 381                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 382                       );
 383                 }
 384             }
 385         };
 386         if ($@) {
 387             warn "WARNING: query problem with $query_to_use " . $@;
 388         }
 389
 390         # Concatenate the sort_by limits and pass them to the results object
 391         # Note: sort will override rank
 392         my $sort_by;
 393         foreach my $sort (@sort_by) {
 394             if ( $sort eq "author_az" ) {
 395                 $sort_by .= "1=1003 <i ";
 396             }
 397             elsif ( $sort eq "author_za" ) {
 398                 $sort_by .= "1=1003 >i ";
 399             }
 400             elsif ( $sort eq "popularity_asc" ) {
 401                 $sort_by .= "1=9003 <i ";
 402             }
 403             elsif ( $sort eq "popularity_dsc" ) {
 404                 $sort_by .= "1=9003 >i ";
 405             }
 406             elsif ( $sort eq "call_number_asc" ) {
 407                 $sort_by .= "1=20  <i ";
 408             }
 409             elsif ( $sort eq "call_number_dsc" ) {
 410                 $sort_by .= "1=20 >i ";
 411             }
 412             elsif ( $sort eq "pubdate_asc" ) {
 413                 $sort_by .= "1=31 <i ";
 414             }
 415             elsif ( $sort eq "pubdate_dsc" ) {
 416                 $sort_by .= "1=31 >i ";
 417             }
 418             elsif ( $sort eq "acqdate_asc" ) {
 419                 $sort_by .= "1=32 <i ";
 420             }
 421             elsif ( $sort eq "acqdate_dsc" ) {
 422                 $sort_by .= "1=32 >i ";
 423             }
 424             elsif ( $sort eq "title_az" ) {
 425                 $sort_by .= "1=4 <i ";
 426             }
 427             elsif ( $sort eq "title_za" ) {
 428                 $sort_by .= "1=4 >i ";
 429             }
 430         }
 431         if ($sort_by) {
 432             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 433                 warn "WARNING sort $sort_by failed";
 434             }
 435         }
 436     }    # finished looping through servers
 437
 438     # The big moment: asynchronously retrieve results from all servers
 439     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 440         my $ev = $zconns[ $i - 1 ]->last_event();
 441         if ( $ev == ZOOM::Event::ZEND ) {
 442             next unless $results[ $i - 1 ];
 443             my $size = $results[ $i - 1 ]->size();
 444             if ( $size > 0 ) {
 445                 my $results_hash;
 446
 447                 # loop through the results
 448                 $results_hash->{'hits'} = $size;
 449                 my $times;
 450                 if ( $offset + $results_per_page <= $size ) {
 451                     $times = $offset + $results_per_page;
 452                 }
 453                 else {
 454                     $times = $size;
 455                 }
 456                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 457                     my $records_hash;
 458                     my $record;
 459                     my $facet_record;
 460
 461                     ## Check if it's an index scan
 462                     if ($scan) {
 463                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 464
 465                  # here we create a minimal MARC record and hand it off to the
 466                  # template just like a normal result ... perhaps not ideal, but
 467                  # it works for now
 468                         my $tmprecord = MARC::Record->new();
 469                         $tmprecord->encoding('UTF-8');
 470                         my $tmptitle;
 471                         my $tmpauthor;
 472
 473                 # the minimal record in author/title (depending on MARC flavour)
 474                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 475                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 476                             $tmprecord->append_fields($tmptitle);
 477                         } else {
 478                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 479                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 480                             $tmprecord->append_fields($tmptitle);
 481                             $tmprecord->append_fields($tmpauthor);
 482                         }
 483                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 484                     }
 485
 486                     # not an index scan
 487                     else {
 488                         $record = $results[ $i - 1 ]->record($j)->raw();
 489
 490                         # warn "RECORD $j:".$record;
 491                         $results_hash->{'RECORDS'}[$j] = $record;
 492
 493             # Fill the facets while we're looping, but only for the biblioserver
 494                         $facet_record = MARC::Record->new_from_usmarc($record)
 495                           if $servers[ $i - 1 ] =~ /biblioserver/;
 496
 497                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 498                         if ($facet_record) {
 499                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 500
 501                                 if ( $facets->[$k] ) {
 502                                     my @fields;
 503                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 504                                     {
 505                                         push @fields,
 506                                           $facet_record->field($tag);
 507                                     }
 508                                     for my $field (@fields) {
 509                                         my @subfields = $field->subfields();
 510                                         for my $subfield (@subfields) {
 511                                             my ( $code, $data ) = @$subfield;
 512                                             if ( $code eq
 513                                                 $facets->[$k]->{'subfield'} )
 514                                             {
 515                                                 $facets_counter->{ $facets->[$k]
 516                                                       ->{'link_value'} }
 517                                                   ->{$data}++;
 518                                             }
 519                                         }
 520                                     }
 521                                     $facets_info->{ $facets->[$k]
 522                                           ->{'link_value'} }->{'label_value'} =
 523                                       $facets->[$k]->{'label_value'};
 524                                     $facets_info->{ $facets->[$k]
 525                                           ->{'link_value'} }->{'expanded'} =
 526                                       $facets->[$k]->{'expanded'};
 527                                 }
 528                             }
 529                         }
 530                     }
 531                 }
 532                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 533             }
 534
 535             # warn "connection ", $i-1, ": $size hits";
 536             # warn $results[$i-1]->record(0)->render() if $size > 0;
 537
 538             # BUILD FACETS
 539             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 540                 for my $link_value (
 541                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 542                     keys %$facets_counter )
 543                 {
 544                     my $expandable;
 545                     my $number_of_facets;
 546                     my @this_facets_array;
 547                     for my $one_facet (
 548                         sort {
 549                             $facets_counter->{$link_value}
 550                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 551                         } keys %{ $facets_counter->{$link_value} }
 552                       )
 553                     {
 554                         $number_of_facets++;
 555                         if (   ( $number_of_facets < 6 )
 556                             || ( $expanded_facet eq $link_value )
 557                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 558                         {
 559
 560                       # Sanitize the link value ), ( will cause errors with CCL,
 561                             my $facet_link_value = $one_facet;
 562                             $facet_link_value =~ s/(\(|\))/ /g;
 563
 564                             # fix the length that will display in the label,
 565                             my $facet_label_value = $one_facet;
 566                             $facet_label_value =
 567                               substr( $one_facet, 0, 20 ) . "..."
 568                               unless length($facet_label_value) <= 20;
 569
 570                             # if it's a branch, label by the name, not the code,
 571                             if ( $link_value =~ /branch/ ) {
 572                                 $facet_label_value =
 573                                   $branches->{$one_facet}->{'branchname'};
 574                             }
 575
 576                 # but we're down with the whole label being in the link's title.
 577                             my $facet_title_value = $one_facet;
 578
 579                             push @this_facets_array,
 580                               (
 581                                 {
 582                                     facet_count =>
 583                                       $facets_counter->{$link_value}
 584                                       ->{$one_facet},
 585                                     facet_label_value => $facet_label_value,
 586                                     facet_title_value => $facet_title_value,
 587                                     facet_link_value  => $facet_link_value,
 588                                     type_link_value   => $link_value,
 589                                 },
 590                               );
 591                         }
 592                     }
 593
 594                     # handle expanded option
 595                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 596                         $expandable = 1
 597                           if ( ( $number_of_facets > 6 )
 598                             && ( $expanded_facet ne $link_value ) );
 599                     }
 600                     push @facets_loop,
 601                       (
 602                         {
 603                             type_link_value => $link_value,
 604                             type_id         => $link_value . "_id",
 605                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 606                             facets     => \@this_facets_array,
 607                             expandable => $expandable,
 608                             expand     => $link_value,
 609                         }
 610                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 611                 }
 612             }
 613         }
 614     }
 615     return ( undef, $results_hashref, \@facets_loop );
 616 }
 617
 618 sub pazGetRecords {
 619     my (
 620         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 621         $results_per_page, $offset,       $expanded_facet, $branches,
 622         $query_type,       $scan
 623     ) = @_;
 624
 625     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 626     $paz->init();
 627     $paz->search($simple_query);
 628     sleep 1;
 629
 630     # do results
 631     my $results_hashref = {};
 632     my $stats = XMLin($paz->stat);
 633     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 634
 635     # for a grouped search result, the number of hits
 636     # is the number of groups returned; 'bib_hits' will have
 637     # the total number of bibs.
 638     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 639     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 640
 641     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 642         my $recid = $hit->{recid}->[0];
 643
 644         my $work_title = $hit->{'md-work-title'}->[0];
 645         my $work_author;
 646         if (exists $hit->{'md-work-author'}) {
 647             $work_author = $hit->{'md-work-author'}->[0];
 648         }
 649         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 650
 651         my $result_group = {};
 652         $result_group->{'group_label'} = $group_label;
 653         $result_group->{'group_merge_key'} = $recid;
 654
 655         my $count = 1;
 656         if (exists $hit->{count}) {
 657             $count = $hit->{count}->[0];
 658         }
 659         $result_group->{'group_count'} = $count;
 660
 661         for (my $i = 0; $i < $count; $i++) {
 662             # FIXME -- may need to worry about diacritics here
 663             my $rec = $paz->record($recid, $i);
 664             push @{ $result_group->{'RECORDS'} }, $rec;
 665         }
 666
 667         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 668     }
 669
 670     # pass through facets
 671     my $termlist_xml = $paz->termlist('author,subject');
 672     my $terms = XMLin($termlist_xml, forcearray => 1);
 673     my @facets_loop = ();
 674     #die Dumper($results);
 675 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 676 #        my @facets = ();
 677 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 678 #            push @facets, {
 679 #                facet_label_value => $facet->{'name'}->[0],
 680 #            };
 681 #        }
 682 #        push @facets_loop, ( {
 683 #            type_label => $list,
 684 #            facets => \@facets,
 685 #        } );
 686 #    }
 687
 688     return ( undef, $results_hashref, \@facets_loop );
 689 }
 690
 691 # STOPWORDS
 692 sub _remove_stopwords {
 693     my ( $operand, $index ) = @_;
 694     my @stopwords_removed;
 695
 696     # phrase and exact-qualified indexes shouldn't have stopwords removed
 697     if ( $index !~ m/phr|ext/ ) {
 698
 699 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 700 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 701 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 702 #       is a stopword, we'd get "çon" and wouldn't find anything...
 703         foreach ( keys %{ C4::Context->stopwords } ) {
 704             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 705             if ( $operand =~
 706                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 707             {
 708                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 709                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 710                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 711                                 $operand =~ s/$1//gi;
 712                 push @stopwords_removed, $_;
 713             }
 714         }
 715     }
 716     return ( $operand, \@stopwords_removed );
 717 }
 718
 719 # TRUNCATION
 720 sub _detect_truncation {
 721     my ( $operand, $index ) = @_;
 722     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 723         @regexpr );
 724     $operand =~ s/^ //g;
 725     my @wordlist = split( /\s/, $operand );
 726     foreach my $word (@wordlist) {
 727         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 728             push @rightlefttruncated, $word;
 729         }
 730         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 731             push @lefttruncated, $word;
 732         }
 733         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 734             push @righttruncated, $word;
 735         }
 736         elsif ( index( $word, "*" ) < 0 ) {
 737             push @nontruncated, $word;
 738         }
 739         else {
 740             push @regexpr, $word;
 741         }
 742     }
 743     return (
 744         \@nontruncated,       \@righttruncated, \@lefttruncated,
 745         \@rightlefttruncated, \@regexpr
 746     );
 747 }
 748
 749 # STEMMING
 750 sub _build_stemmed_operand {
 751     my ($operand) = @_;
 752     my $stemmed_operand;
 753
 754     # If operand contains a digit, it is almost certainly an identifier, and should
 755     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 756     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 757     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 758     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 759     return $operand if $operand =~ /\d/;
 760
 761 # FIXME: the locale should be set based on the user's language and/or search choice
 762     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 763
 764 # FIXME: these should be stored in the db so the librarian can modify the behavior
 765     $stemmer->add_exceptions(
 766         {
 767             'and' => 'and',
 768             'or'  => 'or',
 769             'not' => 'not',
 770         }
 771     );
 772     my @words = split( / /, $operand );
 773     my $stems = $stemmer->stem(@words);
 774     for my $stem (@$stems) {
 775         $stemmed_operand .= "$stem";
 776         $stemmed_operand .= "?"
 777           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 778         $stemmed_operand .= " ";
 779     }
 780     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 781     return $stemmed_operand;
 782 }
 783
 784 # FIELD WEIGHTING
 785 sub _build_weighted_query {
 786
 787 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 788 # pretty well but could work much better if we had a smarter query parser
 789     my ( $operand, $stemmed_operand, $index ) = @_;
 790     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 791     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 792     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 793
 794     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 795
 796     # Keyword, or, no index specified
 797     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 798         $weighted_query .=
 799           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 800         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 801         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 802           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 803           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 804         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 805           if $fuzzy_enabled;    # add fuzzy, word list
 806         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 807           if ( $stemming and $stemmed_operand )
 808           ;                     # add stemming, right truncation
 809         $weighted_query .= " or wrdl,r9=\"$operand\"";
 810
 811         # embedded sorting: 0 a-z; 1 z-a
 812         # $weighted_query .= ") or (sort1,aut=1";
 813     }
 814
 815     # Barcode searches should skip this process
 816     elsif ( $index eq 'bc' ) {
 817         $weighted_query .= "bc=\"$operand\"";
 818     }
 819
 820     # Authority-number searches should skip this process
 821     elsif ( $index eq 'an' ) {
 822         $weighted_query .= "an=\"$operand\"";
 823     }
 824
 825     # If the index already has more than one qualifier, wrap the operand
 826     # in quotes and pass it back (assumption is that the user knows what they
 827     # are doing and won't appreciate us mucking up their query
 828     elsif ( $index =~ ',' ) {
 829         $weighted_query .= " $index=\"$operand\"";
 830     }
 831
 832     #TODO: build better cases based on specific search indexes
 833     else {
 834         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 835           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 836         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 837         $weighted_query .=
 838           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 839     }
 840
 841     $weighted_query .= "))";                       # close rank specification
 842     return $weighted_query;
 843 }
 844
 845 =head2 buildQuery
 846
 847 ( $error, $query,
 848 $simple_query, $query_cgi,
 849 $query_desc, $limit,
 850 $limit_cgi, $limit_desc,
 851 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 852
 853 Build queries and limits in CCL, CGI, Human,
 854 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 855
 856 See verbose embedded documentation.
 857
 858
 859 =cut
 860
 861 sub buildQuery {
 862     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 863
 864     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 865
 866     # dereference
 867     my @operators = @$operators if $operators;
 868     my @indexes   = @$indexes   if $indexes;
 869     my @operands  = @$operands  if $operands;
 870     my @limits    = @$limits    if $limits;
 871     my @sort_by   = @$sort_by   if $sort_by;
 872
 873     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 874     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 875     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 876     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 877     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 878
 879     # no stemming/weight/fuzzy in NoZebra
 880     if ( C4::Context->preference("NoZebra") ) {
 881         $stemming      = 0;
 882         $weight_fields = 0;
 883         $fuzzy_enabled = 0;
 884     }
 885
 886     my $query        = $operands[0];
 887     my $simple_query = $operands[0];
 888
 889     # initialize the variables we're passing back
 890     my $query_cgi;
 891     my $query_desc;
 892     my $query_type;
 893
 894     my $limit;
 895     my $limit_cgi;
 896     my $limit_desc;
 897
 898     my $stopwords_removed;    # flag to determine if stopwords have been removed
 899
 900 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 901 # DIAGNOSTIC ONLY!!
 902     if ( $query =~ /^ccl=/ ) {
 903         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 904     }
 905     if ( $query =~ /^cql=/ ) {
 906         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 907     }
 908     if ( $query =~ /^pqf=/ ) {
 909         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 910     }
 911
 912     # pass nested queries directly
 913     # FIXME: need better handling of some of these variables in this case
 914     if ( $query =~ /(\(|\))/ ) {
 915         return (
 916             undef,              $query, $simple_query, $query_cgi,
 917             $query,             $limit, $limit_cgi,    $limit_desc,
 918             $stopwords_removed, 'ccl'
 919         );
 920     }
 921
 922 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 923 # query operands and indexes and add stemming, truncation, field weighting, etc.
 924 # Once we do so, we'll end up with a value in $query, just like if we had an
 925 # incoming $query from the user
 926     else {
 927         $query = ""
 928           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 929         my $previous_operand
 930           ;    # a flag used to keep track if there was a previous query
 931                # if there was, we can apply the current operator
 932                # for every operand
 933         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 934
 935             # COMBINE OPERANDS, INDEXES AND OPERATORS
 936             if ( $operands[$i] ) {
 937
 938               # A flag to determine whether or not to add the index to the query
 939                 my $indexes_set;
 940
 941 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 942                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 943                     $weight_fields    = 0;
 944                     $stemming         = 0;
 945                     $remove_stopwords = 0;
 946                 }
 947                 my $operand = $operands[$i];
 948                 my $index   = $indexes[$i];
 949
 950                 # Add index-specific attributes
 951                 # Date of Publication
 952                 if ( $index eq 'yr' ) {
 953                     $index .= ",st-numeric";
 954                     $indexes_set++;
 955                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 956                 }
 957
 958                 # Date of Acquisition
 959                 elsif ( $index eq 'acqdate' ) {
 960                     $index .= ",st-date-normalized";
 961                     $indexes_set++;
 962                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 963                 }
 964                 # ISBN,ISSN,Standard Number, don't need special treatment
 965                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 966                     $indexes_set++;
 967                     (
 968                         $stemming,      $auto_truncation,
 969                         $weight_fields, $fuzzy_enabled,
 970                         $remove_stopwords
 971                     ) = ( 0, 0, 0, 0, 0 );
 972
 973                 }
 974                 # Set default structure attribute (word list)
 975                 my $struct_attr;
 976                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 977                     $struct_attr = ",wrdl";
 978                 }
 979
 980                 # Some helpful index variants
 981                 my $index_plus       = $index . $struct_attr . ":" if $index;
 982                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 983
 984                 # Remove Stopwords
 985                 if ($remove_stopwords) {
 986                     ( $operand, $stopwords_removed ) =
 987                       _remove_stopwords( $operand, $index );
 988                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 989                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 990                       if ( $stopwords_removed && $DEBUG );
 991                 }
 992
 993                 # Detect Truncation
 994                 my $truncated_operand;
 995                 my( $nontruncated, $righttruncated, $lefttruncated,
 996                     $rightlefttruncated, $regexpr
 997                 ) = _detect_truncation( $operand, $index );
 998                 warn
 999 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1000                   if $DEBUG;
1001
1002                 # Apply Truncation
1003                 if (
1004                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1005                     scalar(@$rightlefttruncated) > 0 )
1006                 {
1007
1008                # Don't field weight or add the index to the query, we do it here
1009                     $indexes_set = 1;
1010                     undef $weight_fields;
1011                     my $previous_truncation_operand;
1012                     if ( scalar(@$nontruncated) > 0 ) {
1013                         $truncated_operand .= "$index_plus @$nontruncated ";
1014                         $previous_truncation_operand = 1;
1015                     }
1016                     if ( scalar(@$righttruncated) > 0 ) {
1017                         $truncated_operand .= "and "
1018                           if $previous_truncation_operand;
1019                         $truncated_operand .=
1020                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1021                         $previous_truncation_operand = 1;
1022                     }
1023                     if ( scalar(@$lefttruncated) > 0 ) {
1024                         $truncated_operand .= "and "
1025                           if $previous_truncation_operand;
1026                         $truncated_operand .=
1027                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1028                         $previous_truncation_operand = 1;
1029                     }
1030                     if ( scalar(@$rightlefttruncated) > 0 ) {
1031                         $truncated_operand .= "and "
1032                           if $previous_truncation_operand;
1033                         $truncated_operand .=
1034                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1035                         $previous_truncation_operand = 1;
1036                     }
1037                 }
1038                 $operand = $truncated_operand if $truncated_operand;
1039                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1040
1041                 # Handle Stemming
1042                 my $stemmed_operand;
1043                 $stemmed_operand = _build_stemmed_operand($operand)
1044                   if $stemming;
1045                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1046
1047                 # Handle Field Weighting
1048                 my $weighted_operand;
1049                 $weighted_operand =
1050                   _build_weighted_query( $operand, $stemmed_operand, $index )
1051                   if $weight_fields;
1052                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1053                 $operand = $weighted_operand if $weight_fields;
1054                 $indexes_set = 1 if $weight_fields;
1055
1056                 # If there's a previous operand, we need to add an operator
1057                 if ($previous_operand) {
1058
1059                     # User-specified operator
1060                     if ( $operators[ $i - 1 ] ) {
1061                         $query     .= " $operators[$i-1] ";
1062                         $query     .= " $index_plus " unless $indexes_set;
1063                         $query     .= " $operand";
1064                         $query_cgi .= "&op=$operators[$i-1]";
1065                         $query_cgi .= "&idx=$index" if $index;
1066                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1067                         $query_desc .=
1068                           " $operators[$i-1] $index_plus $operands[$i]";
1069                     }
1070
1071                     # Default operator is and
1072                     else {
1073                         $query      .= " and ";
1074                         $query      .= "$index_plus " unless $indexes_set;
1075                         $query      .= "$operand";
1076                         $query_cgi  .= "&op=and&idx=$index" if $index;
1077                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1078                         $query_desc .= " and $index_plus $operands[$i]";
1079                     }
1080                 }
1081
1082                 # There isn't a pervious operand, don't need an operator
1083                 else {
1084
1085                     # Field-weighted queries already have indexes set
1086                     $query .= " $index_plus " unless $indexes_set;
1087                     $query .= $operand;
1088                     $query_desc .= " $index_plus $operands[$i]";
1089                     $query_cgi  .= "&idx=$index" if $index;
1090                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1091                     $previous_operand = 1;
1092                 }
1093             }    #/if $operands
1094         }    # /for
1095     }
1096     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1097
1098     # add limits
1099     my $group_OR_limits;
1100     my $availability_limit;
1101     foreach my $this_limit (@limits) {
1102         if ( $this_limit =~ /available/ ) {
1103
1104 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1105 # In English:
1106 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1107             $availability_limit .=
1108 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1109             $limit_cgi  .= "&limit=available";
1110             $limit_desc .= "";
1111         }
1112
1113         # group_OR_limits, prefixed by mc-
1114         # OR every member of the group
1115         elsif ( $this_limit =~ /mc/ ) {
1116             $group_OR_limits .= " or " if $group_OR_limits;
1117             $limit_desc      .= " or " if $group_OR_limits;
1118             $group_OR_limits .= "$this_limit";
1119             $limit_cgi       .= "&limit=$this_limit";
1120             $limit_desc      .= " $this_limit";
1121         }
1122
1123         # Regular old limits
1124         else {
1125             $limit .= " and " if $limit || $query;
1126             $limit      .= "$this_limit";
1127             $limit_cgi  .= "&limit=$this_limit";
1128             if ($this_limit =~ /^branch:(.+)/) {
1129                 my $branchcode = $1;
1130                 my $branchname = GetBranchName($branchcode);
1131                 if (defined $branchname) {
1132                     $limit_desc .= " branch:$branchname";
1133                 } else {
1134                     $limit_desc .= " $this_limit";
1135                 }
1136             } else {
1137                 $limit_desc .= " $this_limit";
1138             }
1139         }
1140     }
1141     if ($group_OR_limits) {
1142         $limit .= " and " if ( $query || $limit );
1143         $limit .= "($group_OR_limits)";
1144     }
1145     if ($availability_limit) {
1146         $limit .= " and " if ( $query || $limit );
1147         $limit .= "($availability_limit)";
1148     }
1149
1150     # Normalize the query and limit strings
1151     $query =~ s/:/=/g;
1152     $limit =~ s/:/=/g;
1153     for ( $query, $query_desc, $limit, $limit_desc ) {
1154         $_ =~ s/  / /g;    # remove extra spaces
1155         $_ =~ s/^ //g;     # remove any beginning spaces
1156         $_ =~ s/ $//g;     # remove any ending spaces
1157         $_ =~ s/==/=/g;    # remove double == from query
1158     }
1159     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1160
1161     for ($query_cgi,$simple_query) {
1162         $_ =~ s/"//g;
1163     }
1164     # append the limit to the query
1165     $query .= " " . $limit;
1166
1167     # Warnings if DEBUG
1168     if ($DEBUG) {
1169         warn "QUERY:" . $query;
1170         warn "QUERY CGI:" . $query_cgi;
1171         warn "QUERY DESC:" . $query_desc;
1172         warn "LIMIT:" . $limit;
1173         warn "LIMIT CGI:" . $limit_cgi;
1174         warn "LIMIT DESC:" . $limit_desc;
1175         warn "---------\nLeave buildQuery\n---------";
1176     }
1177     return (
1178         undef,              $query, $simple_query, $query_cgi,
1179         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1180         $stopwords_removed, $query_type
1181     );
1182 }
1183
1184 =head2 searchResults
1185
1186 Format results in a form suitable for passing to the template
1187
1188 =cut
1189
1190 # IMO this subroutine is pretty messy still -- it's responsible for
1191 # building the HTML output for the template
1192 sub searchResults {
1193     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1194     my $dbh = C4::Context->dbh;
1195     my $even = 1;
1196     my @newresults;
1197
1198     # add search-term highlighting via <span>s on the search terms
1199     my $span_terms_hashref;
1200     for my $span_term ( split( / /, $searchdesc ) ) {
1201         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1202         $span_terms_hashref->{$span_term}++;
1203     }
1204
1205     #Build branchnames hash
1206     #find branchname
1207     #get branch information.....
1208     my %branches;
1209     my $bsth =
1210       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1211       ;    # FIXME : use C4::Koha::GetBranches
1212     $bsth->execute();
1213     while ( my $bdata = $bsth->fetchrow_hashref ) {
1214         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1215     }
1216 # FIXME - We build an authorised values hash here, using the default framework
1217 # though it is possible to have different authvals for different fws.
1218
1219     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1220
1221     # get notforloan authorised value list (see $shelflocations  FIXME)
1222     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1223
1224     #Build itemtype hash
1225     #find itemtype & itemtype image
1226     my %itemtypes;
1227     $bsth =
1228       $dbh->prepare(
1229         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1230       );
1231     $bsth->execute();
1232     while ( my $bdata = $bsth->fetchrow_hashref ) {
1233                 foreach (qw(description imageurl summary notforloan)) {
1234                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1235                 }
1236     }
1237
1238     #search item field code
1239     my $sth =
1240       $dbh->prepare(
1241 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1242       );
1243     $sth->execute;
1244     my ($itemtag) = $sth->fetchrow;
1245
1246     ## find column names of items related to MARC
1247     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1248     $sth2->execute;
1249     my %subfieldstosearch;
1250     while ( ( my $column ) = $sth2->fetchrow ) {
1251         my ( $tagfield, $tagsubfield ) =
1252           &GetMarcFromKohaField( "items." . $column, "" );
1253         $subfieldstosearch{$column} = $tagsubfield;
1254     }
1255
1256     # handle which records to actually retrieve
1257     my $times;
1258     if ( $hits && $offset + $results_per_page <= $hits ) {
1259         $times = $offset + $results_per_page;
1260     }
1261     else {
1262         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1263     }
1264
1265         my $marcflavour = C4::Context->preference("marcflavour");
1266     # loop through all of the records we've retrieved
1267     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1268         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1269         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1270         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1271         $oldbiblio->{result_number} = $i + 1;
1272
1273         # add imageurl to itemtype if there is one
1274         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1275
1276         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1277                 $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1278                 $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1279                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1280                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1281                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1282                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1283  # Build summary if there is one (the summary is defined in the itemtypes table)
1284  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1285         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1286             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1287             my @fields  = $marcrecord->fields();
1288             foreach my $field (@fields) {
1289                 my $tag      = $field->tag();
1290                 my $tagvalue = $field->as_string();
1291                 $summary =~
1292                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1293                 unless ( $tag < 10 ) {
1294                     my @subf = $field->subfields;
1295                     for my $i ( 0 .. $#subf ) {
1296                         my $subfieldcode  = $subf[$i][0];
1297                         my $subfieldvalue = $subf[$i][1];
1298                         my $tagsubf       = $tag . $subfieldcode;
1299                         $summary =~
1300 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1301                     }
1302                 }
1303             }
1304             # FIXME: yuk
1305             $summary =~ s/\[(.*?)]//g;
1306             $summary =~ s/\n/<br\/>/g;
1307             $oldbiblio->{summary} = $summary;
1308         }
1309
1310         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1311         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1312         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1313         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1314         # Add search-term highlighting to the whole record where they match using <span>s
1315         if (C4::Context->preference("OpacHighlightedWords")){
1316             my $searchhighlightblob;
1317             for my $highlight_field ( $marcrecord->fields ) {
1318
1319     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1320                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1321                 for my $subfield ($highlight_field->subfields()) {
1322                     my $match;
1323                     next if $subfield->[0] eq '9';
1324                     my $field = $subfield->[1];
1325                     for my $term ( keys %$span_terms_hashref ) {
1326                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1327                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1328                         $match++;
1329                         }
1330                     }
1331                     $searchhighlightblob .= $field . " ... " if $match;
1332                 }
1333
1334             }
1335             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1336             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1337         }
1338
1339         # Add search-term highlighting to the title, subtitle, etc. fields
1340         for my $term ( keys %$span_terms_hashref ) {
1341             my $old_term = $term;
1342             if ( length($term) > 3 ) {
1343                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1344                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1345                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1346                                 }
1347             }
1348         }
1349
1350         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1351
1352         # Pull out the items fields
1353         my @fields = $marcrecord->field($itemtag);
1354
1355         # Setting item statuses for display
1356         my @available_items_loop;
1357         my @onloan_items_loop;
1358         my @other_items_loop;
1359
1360         my $available_items;
1361         my $onloan_items;
1362         my $other_items;
1363
1364         my $ordered_count         = 0;
1365         my $available_count       = 0;
1366         my $onloan_count          = 0;
1367         my $longoverdue_count     = 0;
1368         my $other_count           = 0;
1369         my $wthdrawn_count        = 0;
1370         my $itemlost_count        = 0;
1371         my $itembinding_count     = 0;
1372         my $itemdamaged_count     = 0;
1373         my $item_in_transit_count = 0;
1374         my $can_place_holds       = 0;
1375         my $items_count           = scalar(@fields);
1376         my $maxitems =
1377           ( C4::Context->preference('maxItemsinSearchResults') )
1378           ? C4::Context->preference('maxItemsinSearchResults') - 1
1379           : 1;
1380
1381         # loop through every item
1382         foreach my $field (@fields) {
1383             my $item;
1384
1385             # populate the items hash
1386             foreach my $code ( keys %subfieldstosearch ) {
1387                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1388             }
1389                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1390                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1391             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1392             if ($item->{$hbranch}) {
1393                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1394             }
1395             elsif ($item->{$otherbranch}) {     # Last resort
1396                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1397             }
1398
1399                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1400 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1401             if ( $item->{onloan} ) {
1402                 $onloan_count++;
1403                                 my $key = $prefix . $item->{due_date};
1404                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1405                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1406                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1407                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1408                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1409                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1410                 # if something's checked out and lost, mark it as 'long overdue'
1411                 if ( $item->{itemlost} ) {
1412                     $onloan_items->{$prefix}->{longoverdue}++;
1413                     $longoverdue_count++;
1414                 } else {        # can place holds as long as item isn't lost
1415                     $can_place_holds = 1;
1416                 }
1417             }
1418
1419          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1420             else {
1421
1422                 # item is on order
1423                 if ( $item->{notforloan} == -1 ) {
1424                     $ordered_count++;
1425                 }
1426
1427                 # is item in transit?
1428                 my $transfertwhen = '';
1429                 my ($transfertfrom, $transfertto);
1430
1431                 unless ($item->{wthdrawn}
1432                         || $item->{itemlost}
1433                         || $item->{damaged}
1434                         || $item->{notforloan}
1435                         || $items_count > 20) {
1436
1437                     # A couple heuristics to limit how many times
1438                     # we query the database for item transfer information, sacrificing
1439                     # accuracy in some cases for speed;
1440                     #
1441                     # 1. don't query if item has one of the other statuses
1442                     # 2. don't check transit status if the bib has
1443                     #    more than 20 items
1444                     #
1445                     # FIXME: to avoid having the query the database like this, and to make
1446                     #        the in transit status count as unavailable for search limiting,
1447                     #        should map transit status to record indexed in Zebra.
1448                     #
1449                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1450                 }
1451
1452                 # item is withdrawn, lost or damaged
1453                 if (   $item->{wthdrawn}
1454                     || $item->{itemlost}
1455                     || $item->{damaged}
1456                     || $item->{notforloan}
1457                     || ($transfertwhen ne ''))
1458                 {
1459                     $wthdrawn_count++        if $item->{wthdrawn};
1460                     $itemlost_count++        if $item->{itemlost};
1461                     $itemdamaged_count++     if $item->{damaged};
1462                     $item_in_transit_count++ if $transfertwhen ne '';
1463                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1464                     $other_count++;
1465
1466                                         my $key = $prefix . $item->{status};
1467                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1468                         $other_items->{$key}->{$_} = $item->{$_};
1469                                         }
1470                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1471                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1472                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1473                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1474                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1475                 }
1476                 # item is available
1477                 else {
1478                     $can_place_holds = 1;
1479                     $available_count++;
1480                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1481                                         foreach (qw(branchname itemcallnumber)) {
1482                         $available_items->{$prefix}->{$_} = $item->{$_};
1483                                         }
1484                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1485                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1486                 }
1487             }
1488         }    # notforloan, item level and biblioitem level
1489         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1490         $maxitems =
1491           ( C4::Context->preference('maxItemsinSearchResults') )
1492           ? C4::Context->preference('maxItemsinSearchResults') - 1
1493           : 1;
1494         for my $key ( sort keys %$onloan_items ) {
1495             (++$onloanitemscount > $maxitems) and last;
1496             push @onloan_items_loop, $onloan_items->{$key};
1497         }
1498         for my $key ( sort keys %$other_items ) {
1499             (++$otheritemscount > $maxitems) and last;
1500             push @other_items_loop, $other_items->{$key};
1501         }
1502         for my $key ( sort keys %$available_items ) {
1503             (++$availableitemscount > $maxitems) and last;
1504             push @available_items_loop, $available_items->{$key}
1505         }
1506
1507         # XSLT processing of some stuff
1508         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1509             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1510             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1511         }
1512
1513         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1514         $can_place_holds = 0
1515           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1516         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1517         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1518         $oldbiblio->{items_count}          = $items_count;
1519         $oldbiblio->{available_items_loop} = \@available_items_loop;
1520         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1521         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1522         $oldbiblio->{availablecount}       = $available_count;
1523         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1524         $oldbiblio->{onloancount}          = $onloan_count;
1525         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1526         $oldbiblio->{othercount}           = $other_count;
1527         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1528         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1529         $oldbiblio->{itemlostcount}        = $itemlost_count;
1530         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1531         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1532         $oldbiblio->{orderedcount}         = $ordered_count;
1533         push( @newresults, $oldbiblio );
1534     }
1535     return @newresults;
1536 }
1537
1538 #----------------------------------------------------------------------
1539 #
1540 # Non-Zebra GetRecords#
1541 #----------------------------------------------------------------------
1542
1543 =head2 NZgetRecords
1544
1545   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1546
1547 =cut
1548
1549 sub NZgetRecords {
1550     my (
1551         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1552         $results_per_page, $offset,       $expanded_facet, $branches,
1553         $query_type,       $scan
1554     ) = @_;
1555     warn "query =$query" if $DEBUG;
1556     my $result = NZanalyse($query);
1557     warn "results =$result" if $DEBUG;
1558     return ( undef,
1559         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1560         undef );
1561 }
1562
1563 =head2 NZanalyse
1564
1565   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1566   the list is built from an inverted index in the nozebra SQL table
1567   note that title is here only for convenience : the sorting will be very fast when requested on title
1568   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1569
1570 =cut
1571
1572 sub NZanalyse {
1573     my ( $string, $server ) = @_;
1574 #     warn "---------"       if $DEBUG;
1575     warn " NZanalyse" if $DEBUG;
1576 #     warn "---------"       if $DEBUG;
1577
1578  # $server contains biblioserver or authorities, depending on what we search on.
1579  #warn "querying : $string on $server";
1580     $server = 'biblioserver' unless $server;
1581
1582 # if we have a ", replace the content to discard temporarily any and/or/not inside
1583     my $commacontent;
1584     if ( $string =~ /"/ ) {
1585         $string =~ s/"(.*?)"/__X__/;
1586         $commacontent = $1;
1587         warn "commacontent : $commacontent" if $DEBUG;
1588     }
1589
1590 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1591 # then, call again NZanalyse with $left and $right
1592 # (recursive until we find a leaf (=> something without and/or/not)
1593 # delete repeated operator... Would then go in infinite loop
1594     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1595     }
1596
1597     #process parenthesis before.
1598     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1599         my $left     = $1;
1600         my $right    = $4;
1601         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1602         warn
1603 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1604           if $DEBUG;
1605         my $leftresult = NZanalyse( $left, $server );
1606         if ($operator) {
1607             my $rightresult = NZanalyse( $right, $server );
1608
1609             # OK, we have the results for right and left part of the query
1610             # depending of operand, intersect, union or exclude both lists
1611             # to get a result list
1612             if ( $operator eq ' and ' ) {
1613                 return NZoperatorAND($leftresult,$rightresult);
1614             }
1615             elsif ( $operator eq ' or ' ) {
1616
1617                 # just merge the 2 strings
1618                 return $leftresult . $rightresult;
1619             }
1620             elsif ( $operator eq ' not ' ) {
1621                 return NZoperatorNOT($leftresult,$rightresult);
1622             }
1623         }
1624         else {
1625 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1626             return $leftresult;
1627         }
1628     }
1629     warn "string :" . $string if $DEBUG;
1630     my $left = "";
1631     my $right = "";
1632     my $operator = "";
1633     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1634         $left     = $1;
1635         $right    = $3;
1636         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1637     }
1638     warn "no parenthesis. left : $left operator: $operator right: $right"
1639       if $DEBUG;
1640
1641     # it's not a leaf, we have a and/or/not
1642     if ($operator) {
1643
1644         # reintroduce comma content if needed
1645         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1646         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1647         warn "node : $left / $operator / $right\n" if $DEBUG;
1648         my $leftresult  = NZanalyse( $left,  $server );
1649         my $rightresult = NZanalyse( $right, $server );
1650         warn " leftresult : $leftresult" if $DEBUG;
1651         warn " rightresult : $rightresult" if $DEBUG;
1652         # OK, we have the results for right and left part of the query
1653         # depending of operand, intersect, union or exclude both lists
1654         # to get a result list
1655         if ( $operator eq ' and ' ) {
1656             warn "NZAND";
1657             return NZoperatorAND($leftresult,$rightresult);
1658         }
1659         elsif ( $operator eq ' or ' ) {
1660
1661             # just merge the 2 strings
1662             return $leftresult . $rightresult;
1663         }
1664         elsif ( $operator eq ' not ' ) {
1665             return NZoperatorNOT($leftresult,$rightresult);
1666         }
1667         else {
1668
1669 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1670             die "error : operand unknown : $operator for $string";
1671         }
1672
1673         # it's a leaf, do the real SQL query and return the result
1674     }
1675     else {
1676         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1677         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1678         #remove trailing blank at the beginning
1679         $string =~ s/^ //g;
1680         warn "leaf:$string" if $DEBUG;
1681
1682         # parse the string in in operator/operand/value again
1683         my $left = "";
1684         my $operator = "";
1685         my $right = "";
1686         if ($string =~ /(.*)(>=|<=)(.*)/) {
1687             $left     = $1;
1688             $operator = $2;
1689             $right    = $3;
1690         } else {
1691             $left = $string;
1692         }
1693 #         warn "handling leaf... left:$left operator:$operator right:$right"
1694 #           if $DEBUG;
1695         unless ($operator) {
1696             if ($string =~ /(.*)(>|<|=)(.*)/) {
1697                 $left     = $1;
1698                 $operator = $2;
1699                 $right    = $3;
1700                 warn
1701     "handling unless (operator)... left:$left operator:$operator right:$right"
1702                 if $DEBUG;
1703             } else {
1704                 $left = $string;
1705             }
1706         }
1707         my $results;
1708
1709 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1710         $left =~ s/ .*$//;
1711
1712         # automatic replace for short operators
1713         $left = 'title'            if $left =~ '^ti$';
1714         $left = 'author'           if $left =~ '^au$';
1715         $left = 'publisher'        if $left =~ '^pb$';
1716         $left = 'subject'          if $left =~ '^su$';
1717         $left = 'koha-Auth-Number' if $left =~ '^an$';
1718         $left = 'keyword'          if $left =~ '^kw$';
1719         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1720         if ( $operator && $left ne 'keyword' ) {
1721
1722             #do a specific search
1723             my $dbh = C4::Context->dbh;
1724             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1725             my $sth =
1726               $dbh->prepare(
1727 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1728               );
1729             warn "$left / $operator / $right\n" if $DEBUG;
1730
1731             # split each word, query the DB and build the biblionumbers result
1732             #sanitizing leftpart
1733             $left =~ s/^\s+|\s+$//;
1734             foreach ( split / /, $right ) {
1735                 my $biblionumbers;
1736                 $_ =~ s/^\s+|\s+$//;
1737                 next unless $_;
1738                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1739                 $sth->execute( $server, $left, $_ )
1740                   or warn "execute failed: $!";
1741                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1742
1743 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1744 # otherwise, fill the result
1745                     $biblionumbers .= $line
1746                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1747                     warn "result : $value "
1748                       . ( $right  =~ /\d/ ) . "=="
1749                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1750                 }
1751
1752 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1753                 if ($results) {
1754                     warn "NZAND" if $DEBUG;
1755                     $results = NZoperatorAND($biblionumbers,$results);
1756                 }
1757                 else {
1758                     $results = $biblionumbers;
1759                 }
1760             }
1761         }
1762         else {
1763
1764       #do a complete search (all indexes), if index='kw' do complete search too.
1765             my $dbh = C4::Context->dbh;
1766             my $sth =
1767               $dbh->prepare(
1768 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1769               );
1770
1771             # split each word, query the DB and build the biblionumbers result
1772             foreach ( split / /, $string ) {
1773                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1774                 warn "search on all indexes on $_" if $DEBUG;
1775                 my $biblionumbers;
1776                 next unless $_;
1777                 $sth->execute( $server, $_ );
1778                 while ( my $line = $sth->fetchrow ) {
1779                     $biblionumbers .= $line;
1780                 }
1781
1782 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1783                 if ($results) {
1784                     $results = NZoperatorAND($biblionumbers,$results);
1785                 }
1786                 else {
1787                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1788                     $results = $biblionumbers;
1789                 }
1790             }
1791         }
1792         warn "return : $results for LEAF : $string" if $DEBUG;
1793         return $results;
1794     }
1795     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1796 }
1797
1798 sub NZoperatorAND{
1799     my ($rightresult, $leftresult)=@_;
1800
1801     my @leftresult = split /;/, $leftresult;
1802     warn " @leftresult / $rightresult \n" if $DEBUG;
1803
1804     #             my @rightresult = split /;/,$leftresult;
1805     my $finalresult;
1806
1807 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1808 # the result is stored twice, to have the same weight for AND than OR.
1809 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1810 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1811     foreach (@leftresult) {
1812         my $value = $_;
1813         my $countvalue;
1814         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1815         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1816             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1817             $finalresult .=
1818                 "$value-$countvalue;$value-$countvalue;";
1819         }
1820     }
1821     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1822     return $finalresult;
1823 }
1824
1825 sub NZoperatorOR{
1826     my ($rightresult, $leftresult)=@_;
1827     return $rightresult.$leftresult;
1828 }
1829
1830 sub NZoperatorNOT{
1831     my ($leftresult, $rightresult)=@_;
1832
1833     my @leftresult = split /;/, $leftresult;
1834
1835     #             my @rightresult = split /;/,$leftresult;
1836     my $finalresult;
1837     foreach (@leftresult) {
1838         my $value=$_;
1839         $value=$1 if $value=~m/(.*)-\d+$/;
1840         unless ($rightresult =~ "$value-") {
1841             $finalresult .= "$_;";
1842         }
1843     }
1844     return $finalresult;
1845 }
1846
1847 =head2 NZorder
1848
1849   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1850
1851   TODO :: Description
1852
1853 =cut
1854
1855 sub NZorder {
1856     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1857     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1858
1859     # order title asc by default
1860     #     $ordering = '1=36 <i' unless $ordering;
1861     $results_per_page = 20 unless $results_per_page;
1862     $offset           = 0  unless $offset;
1863     my $dbh = C4::Context->dbh;
1864
1865     #
1866     # order by POPULARITY
1867     #
1868     if ( $ordering =~ /popularity/ ) {
1869         my %result;
1870         my %popularity;
1871
1872         # popularity is not in MARC record, it's builded from a specific query
1873         my $sth =
1874           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1875         foreach ( split /;/, $biblionumbers ) {
1876             my ( $biblionumber, $title ) = split /,/, $_;
1877             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1878             $sth->execute($biblionumber);
1879             my $popularity = $sth->fetchrow || 0;
1880
1881 # hint : the key is popularity.title because we can have
1882 # many results with the same popularity. In this cas, sub-ordering is done by title
1883 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1884 # (un-frequent, I agree, but we won't forget anything that way ;-)
1885             $popularity{ sprintf( "%10d", $popularity ) . $title
1886                   . $biblionumber } = $biblionumber;
1887         }
1888
1889     # sort the hash and return the same structure as GetRecords (Zebra querying)
1890         my $result_hash;
1891         my $numbers = 0;
1892         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1893             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1894                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1895                   $result{ $popularity{$key} }->as_usmarc();
1896             }
1897         }
1898         else {                                    # sort popularity ASC
1899             foreach my $key ( sort ( keys %popularity ) ) {
1900                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1901                   $result{ $popularity{$key} }->as_usmarc();
1902             }
1903         }
1904         my $finalresult = ();
1905         $result_hash->{'hits'}         = $numbers;
1906         $finalresult->{'biblioserver'} = $result_hash;
1907         return $finalresult;
1908
1909         #
1910         # ORDER BY author
1911         #
1912     }
1913     elsif ( $ordering =~ /author/ ) {
1914         my %result;
1915         foreach ( split /;/, $biblionumbers ) {
1916             my ( $biblionumber, $title ) = split /,/, $_;
1917             my $record = GetMarcBiblio($biblionumber);
1918             my $author;
1919             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1920                 $author = $record->subfield( '200', 'f' );
1921                 $author = $record->subfield( '700', 'a' ) unless $author;
1922             }
1923             else {
1924                 $author = $record->subfield( '100', 'a' );
1925             }
1926
1927 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1928 # and we don't want to get only 1 result for each of them !!!
1929             $result{ $author . $biblionumber } = $record;
1930         }
1931
1932     # sort the hash and return the same structure as GetRecords (Zebra querying)
1933         my $result_hash;
1934         my $numbers = 0;
1935         if ( $ordering eq 'author_za' ) {    # sort by author desc
1936             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1937                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1938                   $result{$key}->as_usmarc();
1939             }
1940         }
1941         else {                               # sort by author ASC
1942             foreach my $key ( sort ( keys %result ) ) {
1943                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1944                   $result{$key}->as_usmarc();
1945             }
1946         }
1947         my $finalresult = ();
1948         $result_hash->{'hits'}         = $numbers;
1949         $finalresult->{'biblioserver'} = $result_hash;
1950         return $finalresult;
1951
1952         #
1953         # ORDER BY callnumber
1954         #
1955     }
1956     elsif ( $ordering =~ /callnumber/ ) {
1957         my %result;
1958         foreach ( split /;/, $biblionumbers ) {
1959             my ( $biblionumber, $title ) = split /,/, $_;
1960             my $record = GetMarcBiblio($biblionumber);
1961             my $callnumber;
1962             my $frameworkcode = GetFrameworkCode($biblionumber);
1963             my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField(  'items.itemcallnumber', $frameworkcode);
1964                ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber', $frameworkcode)
1965                 unless $callnumber_tag;
1966             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1967                 $callnumber = $record->subfield( '200', 'f' );
1968             } else {
1969                 $callnumber = $record->subfield( '100', 'a' );
1970             }
1971
1972 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1973 # and we don't want to get only 1 result for each of them !!!
1974             $result{ $callnumber . $biblionumber } = $record;
1975         }
1976
1977     # sort the hash and return the same structure as GetRecords (Zebra querying)
1978         my $result_hash;
1979         my $numbers = 0;
1980         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1981             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1982                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1983                   $result{$key}->as_usmarc();
1984             }
1985         }
1986         else {                                     # sort by title ASC
1987             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1988                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1989                   $result{$key}->as_usmarc();
1990             }
1991         }
1992         my $finalresult = ();
1993         $result_hash->{'hits'}         = $numbers;
1994         $finalresult->{'biblioserver'} = $result_hash;
1995         return $finalresult;
1996     }
1997     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1998         my %result;
1999         foreach ( split /;/, $biblionumbers ) {
2000             my ( $biblionumber, $title ) = split /,/, $_;
2001             my $record = GetMarcBiblio($biblionumber);
2002             my ( $publicationyear_tag, $publicationyear_subfield ) =
2003               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2004             my $publicationyear =
2005               $record->subfield( $publicationyear_tag,
2006                 $publicationyear_subfield );
2007
2008 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2009 # and we don't want to get only 1 result for each of them !!!
2010             $result{ $publicationyear . $biblionumber } = $record;
2011         }
2012
2013     # sort the hash and return the same structure as GetRecords (Zebra querying)
2014         my $result_hash;
2015         my $numbers = 0;
2016         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2017             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2018                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2019                   $result{$key}->as_usmarc();
2020             }
2021         }
2022         else {                                 # sort by pub year ASC
2023             foreach my $key ( sort ( keys %result ) ) {
2024                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2025                   $result{$key}->as_usmarc();
2026             }
2027         }
2028         my $finalresult = ();
2029         $result_hash->{'hits'}         = $numbers;
2030         $finalresult->{'biblioserver'} = $result_hash;
2031         return $finalresult;
2032
2033         #
2034         # ORDER BY title
2035         #
2036     }
2037     elsif ( $ordering =~ /title/ ) {
2038
2039 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2040         my %result;
2041         foreach ( split /;/, $biblionumbers ) {
2042             my ( $biblionumber, $title ) = split /,/, $_;
2043
2044 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2045 # and we don't want to get only 1 result for each of them !!!
2046 # hint & speed improvement : we can order without reading the record
2047 # so order, and read records only for the requested page !
2048             $result{ $title . $biblionumber } = $biblionumber;
2049         }
2050
2051     # sort the hash and return the same structure as GetRecords (Zebra querying)
2052         my $result_hash;
2053         my $numbers = 0;
2054         if ( $ordering eq 'title_az' ) {    # sort by title desc
2055             foreach my $key ( sort ( keys %result ) ) {
2056                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2057             }
2058         }
2059         else {                              # sort by title ASC
2060             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2061                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2062             }
2063         }
2064
2065         # limit the $results_per_page to result size if it's more
2066         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2067
2068         # for the requested page, replace biblionumber by the complete record
2069         # speed improvement : avoid reading too much things
2070         for (
2071             my $counter = $offset ;
2072             $counter <= $offset + $results_per_page ;
2073             $counter++
2074           )
2075         {
2076             $result_hash->{'RECORDS'}[$counter] =
2077               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2078         }
2079         my $finalresult = ();
2080         $result_hash->{'hits'}         = $numbers;
2081         $finalresult->{'biblioserver'} = $result_hash;
2082         return $finalresult;
2083     }
2084     else {
2085
2086 #
2087 # order by ranking
2088 #
2089 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2090         my %result;
2091         my %count_ranking;
2092         foreach ( split /;/, $biblionumbers ) {
2093             my ( $biblionumber, $title ) = split /,/, $_;
2094             $title =~ /(.*)-(\d)/;
2095
2096             # get weight
2097             my $ranking = $2;
2098
2099 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2100 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2101 # biblio N has ranking = 6
2102             $count_ranking{$biblionumber} += $ranking;
2103         }
2104
2105 # build the result by "inverting" the count_ranking hash
2106 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2107 #         warn "counting";
2108         foreach ( keys %count_ranking ) {
2109             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2110         }
2111
2112     # sort the hash and return the same structure as GetRecords (Zebra querying)
2113         my $result_hash;
2114         my $numbers = 0;
2115         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2116             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2117         }
2118
2119         # limit the $results_per_page to result size if it's more
2120         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2121
2122         # for the requested page, replace biblionumber by the complete record
2123         # speed improvement : avoid reading too much things
2124         for (
2125             my $counter = $offset ;
2126             $counter <= $offset + $results_per_page ;
2127             $counter++
2128           )
2129         {
2130             $result_hash->{'RECORDS'}[$counter] =
2131               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2132               if $result_hash->{'RECORDS'}[$counter];
2133         }
2134         my $finalresult = ();
2135         $result_hash->{'hits'}         = $numbers;
2136         $finalresult->{'biblioserver'} = $result_hash;
2137         return $finalresult;
2138     }
2139 }
2140
2141 END { }    # module clean-up code here (global destructor)
2142
2143 1;
2144 __END__
2145
2146 =head1 AUTHOR
2147
2148 Koha Developement team <info@koha.org>
2149
2150 =cut