C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  17
  18 use Modern::Perl;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  22 use C4::Koha;      # getFacets
  23 use Koha::DateUtils;
  24 use Koha::Libraries;
  25 use Lingua::Stem;
  26 use C4::Search::PazPar2;
  27 use XML::Simple;
  28 use C4::XSLT;
  29 use C4::Reserves;    # GetReserveStatus
  30 use C4::Debug;
  31 use C4::Charset;
  32 use Koha::AuthorisedValues;
  33 use Koha::ItemTypes;
  34 use Koha::Libraries;
  35 use Koha::Patrons;
  36 use Koha::RecordProcessor;
  37 use YAML;
  38 use URI::Escape;
  39 use Business::ISBN;
  40 use MARC::Record;
  41 use MARC::Field;
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  43
  44 BEGIN {
  45     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  46 }
  47
  48 =head1 NAME
  49
  50 C4::Search - Functions for searching the Koha catalog.
  51
  52 =head1 SYNOPSIS
  53
  54 See opac/opac-search.pl or catalogue/search.pl for example of usage
  55
  56 =head1 DESCRIPTION
  57
  58 This module provides searching functions for Koha's bibliographic databases
  59
  60 =head1 FUNCTIONS
  61
  62 =cut
  63
  64 @ISA    = qw(Exporter);
  65 @EXPORT = qw(
  66   &FindDuplicate
  67   &SimpleSearch
  68   &searchResults
  69   &getRecords
  70   &buildQuery
  71   &GetDistinctValues
  72   &enabled_staff_search_views
  73 );
  74
  75 # make all your functions, whether exported or not;
  76
  77 =head2 FindDuplicate
  78
  79 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  80
  81 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  82
  83 =cut
  84
  85 sub FindDuplicate {
  86     my ($record) = @_;
  87     my $dbh = C4::Context->dbh;
  88     my $result = TransformMarcToKoha( $record, '' );
  89     my $sth;
  90     my $query;
  91     my $search;
  92     my $type;
  93     my ( $biblionumber, $title );
  94
  95     # search duplicate on ISBN, easy and fast..
  96     # ... normalize first
  97     if ( $result->{isbn} ) {
  98         $result->{isbn} =~ s/\(.*$//;
  99         $result->{isbn} =~ s/\s+$//;
 100         $query = "isbn:$result->{isbn}";
 101     }
 102     else {
 103
 104         my $titleindex = 'ti,ext';
 105         my $authorindex = 'au,ext';
 106         my $op = 'and';
 107
 108         $result->{title} =~ s /\\//g;
 109         $result->{title} =~ s /\"//g;
 110         $result->{title} =~ s /\(//g;
 111         $result->{title} =~ s /\)//g;
 112
 113         # FIXME: instead of removing operators, could just do
 114         # quotes around the value
 115         $result->{title} =~ s/(and|or|not)//g;
 116         $query = "$titleindex:\"$result->{title}\"";
 117         if   ( $result->{author} ) {
 118             $result->{author} =~ s /\\//g;
 119             $result->{author} =~ s /\"//g;
 120             $result->{author} =~ s /\(//g;
 121             $result->{author} =~ s /\)//g;
 122
 123             # remove valid operators
 124             $result->{author} =~ s/(and|or|not)//g;
 125             $query .= " $op $authorindex:\"$result->{author}\"";
 126         }
 127     }
 128
 129     my ( $error, $searchresults, undef ) = SimpleSearch($query); # FIXME :: hardcoded !
 130     my @results;
 131     if (!defined $error) {
 132         foreach my $possible_duplicate_record (@{$searchresults}) {
 133             my $marcrecord = new_record_from_zebra(
 134                 'biblioserver',
 135                 $possible_duplicate_record
 136             );
 137
 138             my $result = TransformMarcToKoha( $marcrecord, '' );
 139
 140             # FIXME :: why 2 $biblionumber ?
 141             if ($result) {
 142                 push @results, $result->{'biblionumber'};
 143                 push @results, $result->{'title'};
 144             }
 145         }
 146     }
 147     return @results;
 148 }
 149
 150 =head2 SimpleSearch
 151
 152 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers], [%options] );
 153
 154 This function provides a simple search API on the bibliographic catalog
 155
 156 =over 2
 157
 158 =item C<input arg:>
 159
 160     * $query can be a simple keyword or a complete CCL query
 161     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 162     * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0
 163     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 164     * %options is optional. (e.g. "skip_normalize" allows you to skip changing : to = )
 165
 166
 167 =item C<Return:>
 168
 169     Returns an array consisting of three elements
 170     * $error is undefined unless an error is detected
 171     * $results is a reference to an array of records.
 172     * $total_hits is the number of hits that would have been returned with no limit
 173
 174     If an error is returned the two other return elements are undefined. If error itself is undefined
 175     the other two elements are always defined
 176
 177 =item C<usage in the script:>
 178
 179 =back
 180
 181 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 182
 183 if (defined $error) {
 184     $template->param(query_error => $error);
 185     warn "error: ".$error;
 186     output_html_with_http_headers $input, $cookie, $template->output;
 187     exit;
 188 }
 189
 190 my $hits = @{$marcresults};
 191 my @results;
 192
 193 for my $r ( @{$marcresults} ) {
 194     my $marcrecord = MARC::File::USMARC::decode($r);
 195     my $biblio = TransformMarcToKoha($marcrecord,q{});
 196
 197     #build the iarray of hashs for the template.
 198     push @results, {
 199         title           => $biblio->{'title'},
 200         subtitle        => $biblio->{'subtitle'},
 201         biblionumber    => $biblio->{'biblionumber'},
 202         author          => $biblio->{'author'},
 203         publishercode   => $biblio->{'publishercode'},
 204         publicationyear => $biblio->{'publicationyear'},
 205         };
 206
 207 }
 208
 209 $template->param(result=>\@results);
 210
 211 =cut
 212
 213 sub SimpleSearch {
 214     my ( $query, $offset, $max_results, $servers, %options )  = @_;
 215
 216     return ( 'No query entered', undef, undef ) unless $query;
 217     # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 218     my @servers = defined ( $servers ) ? @$servers : ( 'biblioserver' );
 219     my @zoom_queries;
 220     my @tmpresults;
 221     my @zconns;
 222     my $results = [];
 223     my $total_hits = 0;
 224
 225     # Initialize & Search Zebra
 226     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 227         eval {
 228             $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 229             $query =~ s/:/=/g unless $options{skip_normalize};
 230             $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 231             $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 232
 233             # error handling
 234             my $error =
 235                 $zconns[$i]->errmsg() . " ("
 236               . $zconns[$i]->errcode() . ") "
 237               . $zconns[$i]->addinfo() . " "
 238               . $zconns[$i]->diagset();
 239
 240             return ( $error, undef, undef ) if $zconns[$i]->errcode();
 241         };
 242         if ($@) {
 243
 244             # caught a ZOOM::Exception
 245             my $error =
 246                 $@->message() . " ("
 247               . $@->code() . ") "
 248               . $@->addinfo() . " "
 249               . $@->diagset();
 250             warn $error." for query: $query";
 251             return ( $error, undef, undef );
 252         }
 253     }
 254
 255     _ZOOM_event_loop(
 256         \@zconns,
 257         \@tmpresults,
 258         sub {
 259             my ($i, $size) = @_;
 260             my $first_record = defined($offset) ? $offset + 1 : 1;
 261             my $hits = $tmpresults[ $i - 1 ]->size();
 262             $total_hits += $hits;
 263             my $last_record = $hits;
 264             if ( defined $max_results && $offset + $max_results < $hits ) {
 265                 $last_record = $offset + $max_results;
 266             }
 267
 268             for my $j ( $first_record .. $last_record ) {
 269                 my $record = eval {
 270                   $tmpresults[ $i - 1 ]->record( $j - 1 )->raw()
 271                   ;    # 0 indexed
 272                 };
 273                 push @{$results}, $record if defined $record;
 274             }
 275         }
 276     );
 277
 278     foreach my $zoom_query (@zoom_queries) {
 279         $zoom_query->destroy();
 280     }
 281
 282     return ( undef, $results, $total_hits );
 283 }
 284
 285 =head2 getRecords
 286
 287 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 288
 289         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 290         $results_per_page, $offset,       $branches,       $itemtypes,
 291         $query_type,       $scan,         $opac
 292     );
 293
 294 The all singing, all dancing, multi-server, asynchronous, scanning,
 295 searching, record nabbing, facet-building
 296
 297 See verbose embedded documentation.
 298
 299 =cut
 300
 301 sub getRecords {
 302     my (
 303         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 304         $results_per_page, $offset,       $branches,         $itemtypes,
 305         $query_type,       $scan,         $opac
 306     ) = @_;
 307
 308     my @servers = @$servers_ref;
 309     my @sort_by = @$sort_by_ref;
 310     $offset = 0 if $offset < 0;
 311
 312     # Initialize variables for the ZOOM connection and results object
 313     my $zconn;
 314     my @zconns;
 315     my @results;
 316     my $results_hashref = ();
 317
 318     # TODO simplify this structure ( { branchcode => $branchname } is enought) and remove this parameter
 319     $branches ||= { map { $_->branchcode => { branchname => $_->branchname } } Koha::Libraries->search };
 320
 321     # Initialize variables for the faceted results objects
 322     my $facets_counter = {};
 323     my $facets_info    = {};
 324     my $facets         = getFacets();
 325
 326     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 327
 328     ### LOOP THROUGH THE SERVERS
 329     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 330         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 331
 332 # perform the search, create the results objects
 333 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 334         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 335
 336         #$query_to_use = $simple_query if $scan;
 337         warn $simple_query if ( $scan and $DEBUG );
 338
 339         # Check if we've got a query_type defined, if so, use it
 340         eval {
 341             if ($query_type) {
 342                 if ($query_type =~ /^ccl/) {
 343                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 344                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 345                 } elsif ($query_type =~ /^cql/) {
 346                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
 347                 } elsif ($query_type =~ /^pqf/) {
 348                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
 349                 } else {
 350                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 351                 }
 352             } elsif ($scan) {
 353                     $results[$i] = $zconns[$i]->scan(  new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 354             } else {
 355                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 356             }
 357         };
 358         if ($@) {
 359             warn "WARNING: query problem with $query_to_use " . $@;
 360         }
 361
 362         # Concatenate the sort_by limits and pass them to the results object
 363         # Note: sort will override rank
 364         my $sort_by;
 365         foreach my $sort (@sort_by) {
 366             if ( $sort eq "author_az" || $sort eq "author_asc" ) {
 367                 $sort_by .= "1=1003 <i ";
 368             }
 369             elsif ( $sort eq "author_za" || $sort eq "author_dsc" ) {
 370                 $sort_by .= "1=1003 >i ";
 371             }
 372             elsif ( $sort eq "popularity_asc" ) {
 373                 $sort_by .= "1=9003 <i ";
 374             }
 375             elsif ( $sort eq "popularity_dsc" ) {
 376                 $sort_by .= "1=9003 >i ";
 377             }
 378             elsif ( $sort eq "call_number_asc" ) {
 379                 $sort_by .= "1=8007  <i ";
 380             }
 381             elsif ( $sort eq "call_number_dsc" ) {
 382                 $sort_by .= "1=8007 >i ";
 383             }
 384             elsif ( $sort eq "pubdate_asc" ) {
 385                 $sort_by .= "1=31 <i ";
 386             }
 387             elsif ( $sort eq "pubdate_dsc" ) {
 388                 $sort_by .= "1=31 >i ";
 389             }
 390             elsif ( $sort eq "acqdate_asc" ) {
 391                 $sort_by .= "1=32 <i ";
 392             }
 393             elsif ( $sort eq "acqdate_dsc" ) {
 394                 $sort_by .= "1=32 >i ";
 395             }
 396             elsif ( $sort eq "title_az" || $sort eq "title_asc" ) {
 397                 $sort_by .= "1=4 <i ";
 398             }
 399             elsif ( $sort eq "title_za" || $sort eq "title_dsc" ) {
 400                 $sort_by .= "1=4 >i ";
 401             }
 402             else {
 403                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 404             }
 405         }
 406         if ( $sort_by && !$scan && $results[$i] ) {
 407             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 408                 warn "WARNING sort $sort_by failed";
 409             }
 410         }
 411     }    # finished looping through servers
 412
 413     # The big moment: asynchronously retrieve results from all servers
 414         _ZOOM_event_loop(
 415             \@zconns,
 416             \@results,
 417             sub {
 418                 my ( $i, $size ) = @_;
 419                 my $results_hash;
 420
 421                 # loop through the results
 422                 $results_hash->{'hits'} = $size;
 423                 my $times;
 424                 if ( $offset + $results_per_page <= $size ) {
 425                     $times = $offset + $results_per_page;
 426                 }
 427                 else {
 428                     $times = $size;
 429                 }
 430
 431                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 432                     my $records_hash;
 433                     my $record;
 434
 435                     ## Check if it's an index scan
 436                     if ($scan) {
 437                         my ( $term, $occ ) = $results[ $i - 1 ]->display_term($j);
 438
 439                  # here we create a minimal MARC record and hand it off to the
 440                  # template just like a normal result ... perhaps not ideal, but
 441                  # it works for now
 442                         my $tmprecord = MARC::Record->new();
 443                         $tmprecord->encoding('UTF-8');
 444                         my $tmptitle;
 445                         my $tmpauthor;
 446
 447                 # the minimal record in author/title (depending on MARC flavour)
 448                         if ( C4::Context->preference("marcflavour") eq
 449                             "UNIMARC" )
 450                         {
 451                             $tmptitle = MARC::Field->new(
 452                                 '200', ' ', ' ',
 453                                 a => $term,
 454                                 f => $occ
 455                             );
 456                             $tmprecord->append_fields($tmptitle);
 457                         }
 458                         else {
 459                             $tmptitle =
 460                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 461                             $tmpauthor =
 462                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 463                             $tmprecord->append_fields($tmptitle);
 464                             $tmprecord->append_fields($tmpauthor);
 465                         }
 466                         $results_hash->{'RECORDS'}[$j] =
 467                           $tmprecord->as_usmarc();
 468                     }
 469
 470                     # not an index scan
 471                     else {
 472                         $record = $results[ $i - 1 ]->record($j)->raw();
 473                         # warn "RECORD $j:".$record;
 474                         $results_hash->{'RECORDS'}[$j] = $record;
 475                     }
 476
 477                 }
 478                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 479
 480                 # Fill the facets while we're looping, but only for the
 481                 # biblioserver and not for a scan
 482                 if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) {
 483                     $facets_counter = GetFacets( $results[ $i - 1 ] );
 484                     $facets_info    = _get_facets_info( $facets );
 485                 }
 486
 487                 # BUILD FACETS
 488                 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 489                     for my $link_value (
 490                         sort { $a cmp $b } keys %$facets_counter
 491                       )
 492                     {
 493                         my @this_facets_array;
 494                         for my $one_facet (
 495                             sort {
 496                                 $facets_counter->{$link_value}
 497                                   ->{$b} <=> $facets_counter->{$link_value}
 498                                   ->{$a}
 499                             } keys %{ $facets_counter->{$link_value} }
 500                           )
 501                         {
 502 # Sanitize the link value : parenthesis, question and exclamation mark will cause errors with CCL
 503                             my $facet_link_value = $one_facet;
 504                             $facet_link_value =~ s/[()!?¡¿؟]/ /g;
 505
 506                             # fix the length that will display in the label,
 507                             my $facet_label_value = $one_facet;
 508                             my $facet_max_length  = C4::Context->preference(
 509                                 'FacetLabelTruncationLength')
 510                               || 20;
 511                             $facet_label_value =
 512                               substr( $one_facet, 0, $facet_max_length )
 513                               . "..."
 514                               if length($facet_label_value) >
 515                                   $facet_max_length;
 516
 517                         # if it's a branch, label by the name, not the code,
 518                             if ( $link_value =~ /branch/ ) {
 519                                 if (   defined $branches
 520                                     && ref($branches) eq "HASH"
 521                                     && defined $branches->{$one_facet}
 522                                     && ref( $branches->{$one_facet} ) eq
 523                                     "HASH" )
 524                                 {
 525                                     $facet_label_value =
 526                                       $branches->{$one_facet}
 527                                       ->{'branchname'};
 528                                 }
 529                                 else {
 530                                     $facet_label_value = "*";
 531                                 }
 532                             }
 533
 534                       # if it's a itemtype, label by the name, not the code,
 535                             if ( $link_value =~ /itype/ ) {
 536                                 if (   defined $itemtypes
 537                                     && ref($itemtypes) eq "HASH"
 538                                     && defined $itemtypes->{$one_facet}
 539                                     && ref( $itemtypes->{$one_facet} ) eq
 540                                     "HASH" )
 541                                 {
 542                                     $facet_label_value =
 543                                       $itemtypes->{$one_facet}
 544                                       ->{translated_description};
 545                                 }
 546                             }
 547
 548            # also, if it's a location code, use the name instead of the code
 549                             if ( $link_value =~ /location/ ) {
 550                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 551                                 my $av = Koha::AuthorisedValues->search({ category => 'LOC', authorised_value => $one_facet });
 552                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 553                             }
 554
 555                             # also, if it's a collection code, use the name instead of the code
 556                             if ( $link_value =~ /ccode/ ) {
 557                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 558                                 my $av = Koha::AuthorisedValues->search({ category => 'CCODE', authorised_value => $one_facet });
 559                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 560                             }
 561
 562             # but we're down with the whole label being in the link's title.
 563                             push @this_facets_array,
 564                               {
 565                                 facet_count =>
 566                                   $facets_counter->{$link_value}
 567                                   ->{$one_facet},
 568                                 facet_label_value => $facet_label_value,
 569                                 facet_title_value => $one_facet,
 570                                 facet_link_value  => $facet_link_value,
 571                                 type_link_value   => $link_value,
 572                               }
 573                               if ($facet_label_value);
 574                         }
 575
 576                         push @facets_loop,
 577                           {
 578                             type_link_value => $link_value,
 579                             type_id         => $link_value . "_id",
 580                             "type_label_"
 581                               . $facets_info->{$link_value}->{'label_value'} =>
 582                               1,
 583                             facets     => \@this_facets_array,
 584                           }
 585                           unless (
 586                             (
 587                                 $facets_info->{$link_value}->{'label_value'} =~
 588                                 /Libraries/
 589                             )
 590                             and ( Koha::Libraries->search->count == 1 )
 591                           );
 592                     }
 593                 }
 594             }
 595         );
 596
 597     # This sorts the facets into alphabetical order
 598     if (@facets_loop) {
 599         foreach my $f (@facets_loop) {
 600             $f->{facets} = [ sort { uc($a->{facet_label_value}) cmp uc($b->{facet_label_value}) } @{ $f->{facets} } ];
 601         }
 602         @facets_loop = sort {defined $a->{expand} && defined $b->{expand} && $a->{expand} cmp $b->{expand}} @facets_loop;
 603     }
 604
 605     return ( undef, $results_hashref, \@facets_loop );
 606 }
 607
 608 sub GetFacets {
 609
 610     my $rs = shift;
 611     my $facets;
 612
 613     my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0;
 614
 615     if ( $use_zebra_facets ) {
 616         $facets = _get_facets_from_zebra( $rs );
 617     } else {
 618         $facets = _get_facets_from_records( $rs );
 619     }
 620
 621     return $facets;
 622 }
 623
 624 sub _get_facets_from_records {
 625
 626     my $rs = shift;
 627
 628     my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20;
 629     my $facets_config  = getFacets();
 630     my $facets         = {};
 631     my $size           = $rs->size();
 632     my $jmax           = $size > $facets_maxrecs
 633                             ? $facets_maxrecs
 634                             : $size;
 635
 636     for ( my $j = 0 ; $j < $jmax ; $j++ ) {
 637
 638         my $marc_record = new_record_from_zebra (
 639                 'biblioserver',
 640                 $rs->record( $j )->raw()
 641         );
 642
 643         if ( ! defined $marc_record ) {
 644             warn "ERROR DECODING RECORD - $@: " .
 645                 $rs->record( $j )->raw();
 646             next;
 647         }
 648
 649         _get_facets_data_from_record( $marc_record, $facets_config, $facets );
 650     }
 651
 652     return $facets;
 653 }
 654
 655 =head2 _get_facets_data_from_record
 656
 657     C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter );
 658
 659 Internal function that extracts facets information from a MARC::Record object
 660 and populates $facets_counter for using in getRecords.
 661
 662 $facets is expected to be filled with C4::Koha::getFacets output (i.e. the configured
 663 facets for Zebra).
 664
 665 =cut
 666
 667 sub _get_facets_data_from_record {
 668
 669     my ( $marc_record, $facets, $facets_counter ) = @_;
 670
 671     for my $facet (@$facets) {
 672
 673         my @used_datas = ();
 674
 675         foreach my $tag ( @{ $facet->{ tags } } ) {
 676
 677             # tag number is the first three digits
 678             my $tag_num          = substr( $tag, 0, 3 );
 679             # subfields are the remainder
 680             my $subfield_letters = substr( $tag, 3 );
 681
 682             my @fields = $marc_record->field( $tag_num );
 683             foreach my $field (@fields) {
 684                 # If $field->indicator(1) eq 'z', it means it is a 'see from'
 685                 # field introduced because of IncludeSeeFromInSearches, so skip it
 686                 next if $field->indicator(1) eq 'z';
 687
 688                 my $data = $field->as_string( $subfield_letters, $facet->{ sep } );
 689
 690                 unless ( grep { $_ eq $data } @used_datas ) {
 691                     push @used_datas, $data;
 692                     $facets_counter->{ $facet->{ idx } }->{ $data }++;
 693                 }
 694             }
 695         }
 696     }
 697 }
 698
 699 =head2 _get_facets_from_zebra
 700
 701     my $facets = _get_facets_from_zebra( $result_set )
 702
 703 Retrieves facets for a specified result set. It loops through the facets defined
 704 in C4::Koha::getFacets and returns a hash with the following structure:
 705
 706    {  facet_idx => {
 707             facet_value => count
 708       },
 709       ...
 710    }
 711
 712 =cut
 713
 714 sub _get_facets_from_zebra {
 715
 716     my $rs = shift;
 717
 718     # save current elementSetName
 719     my $elementSetName = $rs->option( 'elementSetName' );
 720
 721     my $facets_loop = getFacets();
 722     my $facets_data  = {};
 723     # loop through defined facets and fill the facets hashref
 724     foreach my $facet ( @$facets_loop ) {
 725
 726         my $idx = $facet->{ idx };
 727         my $sep = $facet->{ sep };
 728         my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep );
 729         if ( $facet_values ) {
 730             # we've actually got a result
 731             $facets_data->{ $idx } = $facet_values;
 732         }
 733     }
 734     # set elementSetName to its previous value to avoid side effects
 735     $rs->option( elementSetName => $elementSetName );
 736
 737     return $facets_data;
 738 }
 739
 740 =head2 _get_facet_from_result_set
 741
 742     my $facet_values =
 743         C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep )
 744
 745 Internal function that extracts facet information for a specific index ($facet_idx) and
 746 returns a hash containing facet values and count:
 747
 748     {
 749         $facet_value => $count ,
 750         ...
 751     }
 752
 753 Warning: this function has the side effect of changing the elementSetName for the result
 754 set. It is a helper function for the main loop, which takes care of backing it up for
 755 restoring.
 756
 757 =cut
 758
 759 sub _get_facet_from_result_set {
 760
 761     my $facet_idx = shift;
 762     my $rs        = shift;
 763     my $sep       = shift;
 764
 765     my $internal_sep  = '<*>';
 766     my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20;
 767
 768     return if ( ! defined $facet_idx || ! defined $rs );
 769     # zebra's facet element, untokenized index
 770     my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount;
 771     # configure zebra results for retrieving the desired facet
 772     $rs->option( elementSetName => $facet_element );
 773     # get the facet record from result set
 774     my $facet = $rs->record( 0 )->raw;
 775     # if the facet has no restuls...
 776     return if !defined $facet;
 777     # TODO: benchmark DOM vs. SAX performance
 778     my $facet_dom = XML::LibXML->load_xml(
 779       string => ($facet)
 780     );
 781     my @terms = $facet_dom->getElementsByTagName('term');
 782     return if ! @terms;
 783
 784     my $facets = {};
 785     foreach my $term ( @terms ) {
 786         my $facet_value = $term->textContent;
 787         $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
 788         $facets->{ $facet_value } = $term->getAttribute( 'occur' );
 789     }
 790
 791     return $facets;
 792 }
 793
 794 =head2 _get_facets_info
 795
 796     my $facets_info = C4::Search::_get_facets_info( $facets )
 797
 798 Internal function that extracts facets information and properly builds
 799 the data structure needed to render facet labels.
 800
 801 =cut
 802
 803 sub _get_facets_info {
 804
 805     my $facets = shift;
 806
 807     my $facets_info = {};
 808
 809     for my $facet ( @$facets ) {
 810         $facets_info->{ $facet->{ idx } }->{ label_value } = $facet->{ label };
 811     }
 812
 813     return $facets_info;
 814 }
 815
 816 sub pazGetRecords {
 817     my (
 818         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 819         $results_per_page, $offset,       $branches,       $query_type,
 820         $scan
 821     ) = @_;
 822     #NOTE: Parameter $branches is not used here !
 823
 824     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 825     $paz->init();
 826     $paz->search($simple_query);
 827     sleep 1;   # FIXME: WHY?
 828
 829     # do results
 830     my $results_hashref = {};
 831     my $stats = XMLin($paz->stat);
 832     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 833
 834     # for a grouped search result, the number of hits
 835     # is the number of groups returned; 'bib_hits' will have
 836     # the total number of bibs.
 837     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 838     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 839
 840     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 841         my $recid = $hit->{recid}->[0];
 842
 843         my $work_title = $hit->{'md-work-title'}->[0];
 844         my $work_author;
 845         if (exists $hit->{'md-work-author'}) {
 846             $work_author = $hit->{'md-work-author'}->[0];
 847         }
 848         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 849
 850         my $result_group = {};
 851         $result_group->{'group_label'} = $group_label;
 852         $result_group->{'group_merge_key'} = $recid;
 853
 854         my $count = 1;
 855         if (exists $hit->{count}) {
 856             $count = $hit->{count}->[0];
 857         }
 858         $result_group->{'group_count'} = $count;
 859
 860         for (my $i = 0; $i < $count; $i++) {
 861             # FIXME -- may need to worry about diacritics here
 862             my $rec = $paz->record($recid, $i);
 863             push @{ $result_group->{'RECORDS'} }, $rec;
 864         }
 865
 866         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 867     }
 868
 869     # pass through facets
 870     my $termlist_xml = $paz->termlist('author,subject');
 871     my $terms = XMLin($termlist_xml, forcearray => 1);
 872     my @facets_loop = ();
 873     #die Dumper($results);
 874 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 875 #        my @facets = ();
 876 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 877 #            push @facets, {
 878 #                facet_label_value => $facet->{'name'}->[0],
 879 #            };
 880 #        }
 881 #        push @facets_loop, ( {
 882 #            type_label => $list,
 883 #            facets => \@facets,
 884 #        } );
 885 #    }
 886
 887     return ( undef, $results_hashref, \@facets_loop );
 888 }
 889
 890 # TRUNCATION
 891 sub _detect_truncation {
 892     my ( $operand, $index ) = @_;
 893     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 894         @regexpr );
 895     $operand =~ s/^ //g;
 896     my @wordlist = split( /\s/, $operand );
 897     foreach my $word (@wordlist) {
 898         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 899             push @rightlefttruncated, $word;
 900         }
 901         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 902             push @lefttruncated, $word;
 903         }
 904         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 905             push @righttruncated, $word;
 906         }
 907         elsif ( index( $word, "*" ) < 0 ) {
 908             push @nontruncated, $word;
 909         }
 910         else {
 911             push @regexpr, $word;
 912         }
 913     }
 914     return (
 915         \@nontruncated,       \@righttruncated, \@lefttruncated,
 916         \@rightlefttruncated, \@regexpr
 917     );
 918 }
 919
 920 # STEMMING
 921 sub _build_stemmed_operand {
 922     my ($operand,$lang) = @_;
 923     require Lingua::Stem::Snowball ;
 924     my $stemmed_operand=q{};
 925
 926     # Stemmer needs language
 927     return $operand unless $lang;
 928
 929     # If operand contains a digit, it is almost certainly an identifier, and should
 930     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 931     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 932     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 933     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 934     return $operand if $operand =~ /\d/;
 935
 936 # FIXME: the locale should be set based on the user's language and/or search choice
 937     #warn "$lang";
 938     # Make sure we only use the first two letters from the language code
 939     $lang = lc(substr($lang, 0, 2));
 940     # The language codes for the two variants of Norwegian will now be "nb" and "nn",
 941     # none of which Lingua::Stem::Snowball can use, so we need to "translate" them
 942     if ($lang eq 'nb' || $lang eq 'nn') {
 943       $lang = 'no';
 944     }
 945     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
 946                                                encoding => "UTF-8" );
 947
 948     my @words = split( / /, $operand );
 949     my @stems = $stemmer->stem(\@words);
 950     for my $stem (@stems) {
 951         $stemmed_operand .= "$stem";
 952         $stemmed_operand .= "?"
 953           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 954         $stemmed_operand .= " ";
 955     }
 956     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 957     return $stemmed_operand;
 958 }
 959
 960 # FIELD WEIGHTING
 961 sub _build_weighted_query {
 962
 963 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 964 # pretty well but could work much better if we had a smarter query parser
 965     my ( $operand, $stemmed_operand, $index ) = @_;
 966     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 967     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 968     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 969     $operand =~ s/"/ /g;    # Bug 7518: searches with quotation marks don't work
 970
 971     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 972
 973     # Keyword, or, no index specified
 974     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 975         $weighted_query .=
 976           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 977         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 978         $weighted_query .= " or Title-cover,phr,r3=\"$operand\"";    # phrase title
 979         $weighted_query .= " or ti,wrdl,r4=\"$operand\"";    # words in title
 980           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 981           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 982         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 983           if $fuzzy_enabled;    # add fuzzy, word list
 984         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 985           if ( $stemming and $stemmed_operand )
 986           ;                     # add stemming, right truncation
 987         $weighted_query .= " or wrdl,r9=\"$operand\"";
 988
 989         # embedded sorting: 0 a-z; 1 z-a
 990         # $weighted_query .= ") or (sort1,aut=1";
 991     }
 992
 993     # Barcode searches should skip this process
 994     elsif ( $index eq 'bc' ) {
 995         $weighted_query .= "bc=\"$operand\"";
 996     }
 997
 998     # Authority-number searches should skip this process
 999     elsif ( $index eq 'an' ) {
1000         $weighted_query .= "an=\"$operand\"";
1001     }
1002
1003     # If the index is numeric, don't autoquote it.
1004     elsif ( $index =~ /,st-numeric$/ ) {
1005         $weighted_query .= " $index=$operand";
1006     }
1007
1008     # If the index already has more than one qualifier, wrap the operand
1009     # in quotes and pass it back (assumption is that the user knows what they
1010     # are doing and won't appreciate us mucking up their query
1011     elsif ( $index =~ ',' ) {
1012         $weighted_query .= " $index=\"$operand\"";
1013     }
1014
1015     #TODO: build better cases based on specific search indexes
1016     else {
1017         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
1018           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
1019         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
1020         $weighted_query .= " or $index,wrdl,r6=\"$operand\"";    # word list index
1021         $weighted_query .= " or $index,wrdl,fuzzy,r8=\"$operand\""
1022           if $fuzzy_enabled;    # add fuzzy, word list
1023         $weighted_query .= " or $index,wrdl,rt,r9=\"$stemmed_operand\""
1024           if ( $stemming and $stemmed_operand );    # add stemming, right truncation
1025     }
1026
1027     $weighted_query .= "))";                       # close rank specification
1028     return $weighted_query;
1029 }
1030
1031 =head2 getIndexes
1032
1033 Return an array with available indexes.
1034
1035 =cut
1036
1037 sub getIndexes{
1038     my @indexes = (
1039                     # biblio indexes
1040                     'ab',
1041                     'Abstract',
1042                     'acqdate',
1043                     'allrecords',
1044                     'an',
1045                     'Any',
1046                     'at',
1047                     'arl',
1048                     'arp',
1049                     'au',
1050                     'aub',
1051                     'aud',
1052                     'audience',
1053                     'auo',
1054                     'aut',
1055                     'Author',
1056                     'Author-in-order ',
1057                     'Author-personal-bibliography',
1058                     'Authority-Number',
1059                     'authtype',
1060                     'bc',
1061                     'Bib-level',
1062                     'biblionumber',
1063                     'bio',
1064                     'biography',
1065                     'callnum',
1066                     'cfn',
1067                     'Chronological-subdivision',
1068                     'cn-bib-source',
1069                     'cn-bib-sort',
1070                     'cn-class',
1071                     'cn-item',
1072                     'cn-prefix',
1073                     'cn-suffix',
1074                     'cpn',
1075                     'Code-institution',
1076                     'Conference-name',
1077                     'Conference-name-heading',
1078                     'Conference-name-see',
1079                     'Conference-name-seealso',
1080                     'Content-type',
1081                     'Control-number',
1082                     'copydate',
1083                     'Corporate-name',
1084                     'Corporate-name-heading',
1085                     'Corporate-name-see',
1086                     'Corporate-name-seealso',
1087                     'Country-publication',
1088                     'ctype',
1089                     'curriculum',
1090                     'date-entered-on-file',
1091                     'Date-of-acquisition',
1092                     'Date-of-publication',
1093                     'Date-time-last-modified',
1094                     'Dewey-classification',
1095                     'Dissertation-information',
1096                     'diss',
1097                     'dtlm',
1098                     'EAN',
1099                     'extent',
1100                     'fic',
1101                     'fiction',
1102                     'Form-subdivision',
1103                     'format',
1104                     'Geographic-subdivision',
1105                     'he',
1106                     'Heading',
1107                     'Heading-use-main-or-added-entry',
1108                     'Heading-use-series-added-entry ',
1109                     'Heading-use-subject-added-entry',
1110                     'Host-item',
1111                     'id-other',
1112                     'ident',
1113                     'Identifier-standard',
1114                     'Illustration-code',
1115                     'Index-term-genre',
1116                     'Index-term-uncontrolled',
1117                     'Interest-age-level',
1118                     'Interest-grade-level',
1119                     'ISBN',
1120                     'isbn',
1121                     'ISSN',
1122                     'issn',
1123                     'itemtype',
1124                     'kw',
1125                     'Koha-Auth-Number',
1126                     'l-format',
1127                     'language',
1128                     'language-original',
1129                     'lc-card',
1130                     'LC-card-number',
1131                     'lcn',
1132                     'lex',
1133                     'lexile-number',
1134                     'llength',
1135                     'ln',
1136                     'ln-audio',
1137                     'ln-subtitle',
1138                     'Local-classification',
1139                     'Local-number',
1140                     'Match-heading',
1141                     'Match-heading-see-from',
1142                     'Material-type',
1143                     'mc-itemtype',
1144                     'mc-rtype',
1145                     'mus',
1146                     'name',
1147                     'Music-number',
1148                     'Name-geographic',
1149                     'Name-geographic-heading',
1150                     'Name-geographic-see',
1151                     'Name-geographic-seealso',
1152                     'nb',
1153                     'Note',
1154                     'notes',
1155                     'ns',
1156                     'nt',
1157                     'Other-control-number',
1158                     'pb',
1159                     'Personal-name',
1160                     'Personal-name-heading',
1161                     'Personal-name-see',
1162                     'Personal-name-seealso',
1163                     'pl',
1164                     'Place-publication',
1165                     'pn',
1166                     'popularity',
1167                     'pubdate',
1168                     'Publisher',
1169                     'Provider',
1170                     'pv',
1171                     'Reading-grade-level',
1172                     'Record-control-number',
1173                     'rcn',
1174                     'Record-type',
1175                     'rtype',
1176                     'se',
1177                     'See',
1178                     'See-also',
1179                     'sn',
1180                     'Stock-number',
1181                     'su',
1182                     'Subject',
1183                     'Subject-heading-thesaurus',
1184                     'Subject-name-personal',
1185                     'Subject-subdivision',
1186                     'Summary',
1187                     'Suppress',
1188                     'su-geo',
1189                     'su-na',
1190                     'su-to',
1191                     'su-ut',
1192                     'ut',
1193                     'Term-genre-form',
1194                     'Term-genre-form-heading',
1195                     'Term-genre-form-see',
1196                     'Term-genre-form-seealso',
1197                     'ti',
1198                     'Title',
1199                     'Title-cover',
1200                     'Title-series',
1201                     'Title-uniform',
1202                     'Title-uniform-heading',
1203                     'Title-uniform-see',
1204                     'Title-uniform-seealso',
1205                     'totalissues',
1206                     'yr',
1207
1208                     # items indexes
1209                     'acqsource',
1210                     'barcode',
1211                     'bc',
1212                     'branch',
1213                     'ccode',
1214                     'classification-source',
1215                     'cn-sort',
1216                     'coded-location-qualifier',
1217                     'copynumber',
1218                     'damaged',
1219                     'datelastborrowed',
1220                     'datelastseen',
1221                     'holdingbranch',
1222                     'homebranch',
1223                     'issues',
1224                     'item',
1225                     'itemnumber',
1226                     'itype',
1227                     'Local-classification',
1228                     'location',
1229                     'lost',
1230                     'materials-specified',
1231                     'mc-ccode',
1232                     'mc-itype',
1233                     'mc-loc',
1234                     'notforloan',
1235                     'Number-local-acquisition',
1236                     'onloan',
1237                     'price',
1238                     'renewals',
1239                     'replacementprice',
1240                     'replacementpricedate',
1241                     'reserves',
1242                     'restricted',
1243                     'stack',
1244                     'stocknumber',
1245                     'inv',
1246                     'uri',
1247                     'withdrawn',
1248
1249                     # subject related
1250                   );
1251
1252     return \@indexes;
1253 }
1254
1255 =head2 buildQuery
1256
1257 ( $error, $query,
1258 $simple_query, $query_cgi,
1259 $query_desc, $limit,
1260 $limit_cgi, $limit_desc,
1261 $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
1262
1263 Build queries and limits in CCL, CGI, Human,
1264 handle truncation, stemming, field weighting, fuzziness, etc.
1265
1266 See verbose embedded documentation.
1267
1268
1269 =cut
1270
1271 sub buildQuery {
1272     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
1273
1274     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
1275
1276     my $query_desc;
1277
1278     # dereference
1279     my @operators = $operators ? @$operators : ();
1280     my @indexes   = $indexes   ? @$indexes   : ();
1281     my @operands  = $operands  ? @$operands  : ();
1282     my @limits    = $limits    ? @$limits    : ();
1283     my @sort_by   = $sort_by   ? @$sort_by   : ();
1284
1285     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
1286     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
1287     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
1288     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
1289
1290     my $query        = $operands[0];
1291     my $simple_query = $operands[0];
1292
1293     # initialize the variables we're passing back
1294     my $query_cgi;
1295     my $query_type;
1296
1297     my $limit;
1298     my $limit_cgi;
1299     my $limit_desc;
1300
1301     my $cclq       = 0;
1302     my $cclindexes = getIndexes();
1303     if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) {
1304         while ( !$cclq && $query =~ /(?:^|\W)([\w-]+)(,[\w-]+)*[:=]/g ) {
1305             my $dx = lc($1);
1306             $cclq = grep { lc($_) eq $dx } @$cclindexes;
1307         }
1308         $query = "ccl=$query" if $cclq;
1309     }
1310
1311 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1312 # DIAGNOSTIC ONLY!!
1313     if ( $query =~ /^ccl=/ ) {
1314         my $q=$';
1315         # This is needed otherwise ccl= and &limit won't work together, and
1316         # this happens when selecting a subject on the opac-detail page
1317         @limits = grep {!/^$/} @limits;
1318         my $original_q = $q; # without available part
1319         unless ( grep { $_ eq 'available' } @limits ) {
1320             $q =~ s| and \( \(allrecords,AlwaysMatches=''\) and \(not-onloan-count,st-numeric >= 1\) and \(lost,st-numeric=0\) \)||;
1321             $original_q = $q;
1322         }
1323         if ( @limits ) {
1324             if ( grep { $_ eq 'available' } @limits ) {
1325                 $q .= q| and ( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )|;
1326                 @limits = grep {!/^available$/} @limits;
1327             }
1328             $q .= ' and '.join(' and ', @limits) if @limits;
1329         }
1330         return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $original_q, '', '', '', 'ccl' );
1331     }
1332     if ( $query =~ /^cql=/ ) {
1333         return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', 'cql' );
1334     }
1335     if ( $query =~ /^pqf=/ ) {
1336         $query_desc = $';
1337         $query_cgi = "q=pqf=".uri_escape_utf8($');
1338         return ( undef, $', $', $query_cgi, $query_desc, '', '', '', 'pqf' );
1339     }
1340
1341     # pass nested queries directly
1342     # FIXME: need better handling of some of these variables in this case
1343     # Nested queries aren't handled well and this implementation is flawed and causes users to be
1344     # unable to search for anything containing () commenting out, will be rewritten for 3.4.0
1345 #    if ( $query =~ /(\(|\))/ ) {
1346 #        return (
1347 #            undef,              $query, $simple_query, $query_cgi,
1348 #            $query,             $limit, $limit_cgi,    $limit_desc,
1349 #            'ccl'
1350 #        );
1351 #    }
1352
1353 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1354 # query operands and indexes and add stemming, truncation, field weighting, etc.
1355 # Once we do so, we'll end up with a value in $query, just like if we had an
1356 # incoming $query from the user
1357     else {
1358         $query = ""
1359           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1360         my $previous_operand
1361           ;    # a flag used to keep track if there was a previous query
1362                # if there was, we can apply the current operator
1363                # for every operand
1364         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1365
1366             # COMBINE OPERANDS, INDEXES AND OPERATORS
1367             if ( ($operands[$i] // '') ne '' ) {
1368                 $operands[$i]=~s/^\s+//;
1369
1370               # A flag to determine whether or not to add the index to the query
1371                 my $indexes_set;
1372
1373 # If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling
1374                 if ( $operands[$i] =~ /\w(:|=)/ || $scan ) {
1375                     $weight_fields    = 0;
1376                     $stemming         = 0;
1377                 } else {
1378                     $operands[$i] =~ s/\?/{?}/g; # need to escape question marks
1379                 }
1380                 my $operand = $operands[$i];
1381                 my $index   = $indexes[$i] || 'kw';
1382
1383                 # Add index-specific attributes
1384
1385                 #Afaik, this 'yr' condition will only ever be met in the staff client advanced search
1386                 #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query,
1387                 #which is processed higher up in this sub. Other than that, year searches are typically
1388                 #handled as limits which are not processed her either.
1389
1390                 # Search ranges: Date of Publication, st-numeric
1391                 if ( $index =~ /(yr|st-numeric)/ ) {
1392                     #weight_fields/relevance search causes errors with date ranges
1393                     #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
1394                     #In the case of YYYY-YYYY, it will return no results
1395                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1396                 }
1397
1398                 # Date of Acquisition
1399                 elsif ( $index =~ /acqdate/ ) {
1400                     #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format
1401                     #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the
1402                       #top of the results just because they have lots of item records matching that date.
1403                     #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
1404                       #irrelevant here
1405                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1406                 }
1407                 # ISBN,ISSN,Standard Number, don't need special treatment
1408                 elsif ( $index eq 'nb' || $index eq 'ns' || $index eq 'hi' ) {
1409                     (
1410                         $stemming,      $auto_truncation,
1411                         $weight_fields, $fuzzy_enabled
1412                     ) = ( 0, 0, 0, 0 );
1413
1414                     if ( $index eq 'nb' ) {
1415                         if ( C4::Context->preference("SearchWithISBNVariations") ) {
1416                             my @isbns = C4::Koha::GetVariationsOfISBN( $operand );
1417                             $operands[$i] = $operand =  '(nb=' . join(' OR nb=', @isbns) . ')';
1418                             $indexes[$i] = $index = 'kw';
1419                         }
1420                     }
1421                 }
1422
1423                 # Set default structure attribute (word list)
1424                 my $struct_attr = q{};
1425                 unless ( $indexes_set || $index =~ /,(st-|phr|ext|wrdl)/ || $index =~ /^(nb|ns)$/ ) {
1426                     $struct_attr = ",wrdl";
1427                 }
1428
1429                 # Some helpful index variants
1430                 my $index_plus       = $index . $struct_attr . ':';
1431                 my $index_plus_comma = $index . $struct_attr . ',';
1432
1433                 if ($auto_truncation){
1434                         unless ( $index =~ /,(st-|phr|ext)/ ) {
1435                                                 #FIXME only valid with LTR scripts
1436                                                 $operand=join(" ",map{
1437                                                                                         (index($_,"*")>0?"$_":"$_*")
1438                                                                                          }split (/\s+/,$operand));
1439                                                 warn $operand if $DEBUG;
1440                                         }
1441                                 }
1442
1443                 # Detect Truncation
1444                 my $truncated_operand;
1445                 my( $nontruncated, $righttruncated, $lefttruncated,
1446                     $rightlefttruncated, $regexpr
1447                 ) = _detect_truncation( $operand, $index );
1448                 warn
1449 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1450                   if $DEBUG;
1451
1452                 # Apply Truncation
1453                 if (
1454                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1455                     scalar(@$rightlefttruncated) > 0 )
1456                 {
1457
1458                # Don't field weight or add the index to the query, we do it here
1459                     $indexes_set = 1;
1460                     undef $weight_fields;
1461                     my $previous_truncation_operand;
1462                     if (scalar @$nontruncated) {
1463                         $truncated_operand .= "$index_plus @$nontruncated ";
1464                         $previous_truncation_operand = 1;
1465                     }
1466                     if (scalar @$righttruncated) {
1467                         $truncated_operand .= "and " if $previous_truncation_operand;
1468                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1469                         $previous_truncation_operand = 1;
1470                     }
1471                     if (scalar @$lefttruncated) {
1472                         $truncated_operand .= "and " if $previous_truncation_operand;
1473                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1474                         $previous_truncation_operand = 1;
1475                     }
1476                     if (scalar @$rightlefttruncated) {
1477                         $truncated_operand .= "and " if $previous_truncation_operand;
1478                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1479                         $previous_truncation_operand = 1;
1480                     }
1481                 }
1482                 $operand = $truncated_operand if $truncated_operand;
1483                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1484
1485                 # Handle Stemming
1486                 my $stemmed_operand;
1487                 $stemmed_operand = _build_stemmed_operand($operand, $lang)
1488                                                                                 if $stemming;
1489
1490                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1491
1492                 # Handle Field Weighting
1493                 my $weighted_operand;
1494                 if ($weight_fields) {
1495                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1496                     $operand = $weighted_operand;
1497                     $indexes_set = 1;
1498                 }
1499
1500                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1501
1502                 ($query,$query_cgi,$query_desc,$previous_operand) = _build_initial_query({
1503                     query => $query,
1504                     query_cgi => $query_cgi,
1505                     query_desc => $query_desc,
1506                     operator => ($operators[ $i - 1 ]) ? $operators[ $i - 1 ] : '',
1507                     parsed_operand => $operand,
1508                     original_operand => $operands[$i] // '',
1509                     index => $index,
1510                     index_plus => $index_plus,
1511                     indexes_set => $indexes_set,
1512                     previous_operand => $previous_operand,
1513                 });
1514
1515             }    #/if $operands
1516         }    # /for
1517     }
1518     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1519
1520     # add limits
1521     my %group_OR_limits;
1522     my $availability_limit;
1523     foreach my $this_limit (@limits) {
1524         next unless $this_limit;
1525         if ( $this_limit =~ /available/ ) {
1526 #
1527 ## 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1528 ## In English:
1529 ## all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1530             $availability_limit .=
1531 "( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )";
1532             $limit_cgi  .= "&limit=available";
1533             $limit_desc .= "";
1534         }
1535
1536         # group_OR_limits, prefixed by mc-
1537         # OR every member of the group
1538         elsif ( $this_limit =~ /mc/ ) {
1539             my ($k,$v) = split(/:/, $this_limit,2);
1540             if ( $k !~ /mc-i(tem)?type/ ) {
1541                 # in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes
1542                 $this_limit =~ tr/"//d;
1543                 $this_limit = $k.":'".$v."'";
1544             }
1545
1546             $group_OR_limits{$k} .= " or " if $group_OR_limits{$k};
1547             $limit_desc      .= " or " if $group_OR_limits{$k};
1548             $group_OR_limits{$k} .= "$this_limit";
1549             $limit_cgi       .= "&limit=" . uri_escape_utf8($this_limit);
1550             $limit_desc      .= " $this_limit";
1551         }
1552
1553         # Regular old limits
1554         else {
1555             $limit .= " and " if $limit || $query;
1556             $limit      .= "$this_limit";
1557             $limit_cgi  .= "&limit=" . uri_escape_utf8($this_limit);
1558             if ($this_limit =~ /^branch:(.+)/) {
1559                 my $branchcode = $1;
1560                 my $library = Koha::Libraries->find( $branchcode );
1561                 if (defined $library) {
1562                     $limit_desc .= " branch:" . $library->branchname;
1563                 } else {
1564                     $limit_desc .= " $this_limit";
1565                 }
1566             } else {
1567                 $limit_desc .= " $this_limit";
1568             }
1569         }
1570     }
1571     foreach my $k (keys (%group_OR_limits)) {
1572         $limit .= " and " if ( $query || $limit );
1573         $limit .= "($group_OR_limits{$k})";
1574     }
1575     if ($availability_limit) {
1576         $limit .= " and " if ( $query || $limit );
1577         $limit .= "($availability_limit)";
1578     }
1579
1580     # Normalize the query and limit strings
1581     # This is flawed , means we can't search anything with : in it
1582     # if user wants to do ccl or cql, start the query with that
1583 #    $query =~ s/:/=/g;
1584     #NOTE: We use several several different regexps here as you can't have variable length lookback assertions
1585     $query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g;
1586     $query =~ s/(?<=(wrdl)):/=/g;
1587     $query =~ s/(?<=(trn|phr)):/=/g;
1588     $query =~ s/(?<=(st-numeric)):/=/g;
1589     $query =~ s/(?<=(st-year)):/=/g;
1590     $query =~ s/(?<=(st-date-normalized)):/=/g;
1591
1592     # Removing warnings for later substitutions
1593     $query      //= q{};
1594     $query_desc //= q{};
1595     $query_cgi  //= q{};
1596     $limit      //= q{};
1597     $limit_desc //= q{};
1598     $limit =~ s/:/=/g;
1599     for ( $query, $query_desc, $limit, $limit_desc ) {
1600         s/  +/ /g;    # remove extra spaces
1601         s/^ //g;     # remove any beginning spaces
1602         s/ $//g;     # remove any ending spaces
1603         s/==/=/g;    # remove double == from query
1604     }
1605     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1606
1607     for ($query_cgi,$simple_query) {
1608         s/"//g;
1609     }
1610     # append the limit to the query
1611     $query .= " " . $limit;
1612
1613     # Warnings if DEBUG
1614     if ($DEBUG) {
1615         warn "QUERY:" . $query;
1616         warn "QUERY CGI:" . $query_cgi;
1617         warn "QUERY DESC:" . $query_desc;
1618         warn "LIMIT:" . $limit;
1619         warn "LIMIT CGI:" . $limit_cgi;
1620         warn "LIMIT DESC:" . $limit_desc;
1621         warn "---------\nLeave buildQuery\n---------";
1622     }
1623
1624     return (
1625         undef,              $query, $simple_query, $query_cgi,
1626         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1627         $query_type
1628     );
1629 }
1630
1631 =head2 _build_initial_query
1632
1633   ($query, $query_cgi, $query_desc, $previous_operand) = _build_initial_query($initial_query_params);
1634
1635   Build a section of the initial query containing indexes, operators, and operands.
1636
1637 =cut
1638
1639 sub _build_initial_query {
1640     my ($params) = @_;
1641
1642     my $operator = "";
1643     if ($params->{previous_operand}){
1644         #If there is a previous operand, add a supplied operator or the default 'and'
1645         $operator = ($params->{operator}) ? " ".($params->{operator})." " : ' and ';
1646     }
1647
1648     #NOTE: indexes_set is typically set when doing truncation or field weighting
1649     my $operand = ($params->{indexes_set}) ? $params->{parsed_operand} : $params->{index_plus}.$params->{parsed_operand};
1650
1651     #e.g. "kw,wrdl:test"
1652     #e.g. " and kw,wrdl:test"
1653     $params->{query} .= $operator . $operand;
1654
1655     $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator;
1656     $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index};
1657     $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if $params->{original_operand};
1658
1659     #e.g. " and kw,wrdl: test"
1660     $params->{query_desc} .= $operator . ( $params->{index_plus} // q{} ) . " " . ( $params->{original_operand} // q{} );
1661
1662     $params->{previous_operand} = 1 unless $params->{previous_operand}; #If there is no previous operand, mark this as one
1663
1664     return ($params->{query}, $params->{query_cgi}, $params->{query_desc}, $params->{previous_operand});
1665 }
1666
1667 =head2 searchResults
1668
1669   my @search_results = searchResults($search_context, $searchdesc, $hits,
1670                                      $results_per_page, $offset, $scan,
1671                                      @marcresults);
1672
1673 Format results in a form suitable for passing to the template
1674
1675 =cut
1676
1677 # IMO this subroutine is pretty messy still -- it's responsible for
1678 # building the HTML output for the template
1679 sub searchResults {
1680     my ( $search_context, $searchdesc, $hits, $results_per_page, $offset, $scan, $marcresults, $xslt_variables ) = @_;
1681     my $dbh = C4::Context->dbh;
1682     my @newresults;
1683
1684     require C4::Items;
1685
1686     $search_context->{'interface'} = 'opac' if !$search_context->{'interface'} || $search_context->{'interface'} ne 'intranet';
1687     my ($is_opac, $hidelostitems);
1688     if ($search_context->{'interface'} eq 'opac') {
1689         $hidelostitems = C4::Context->preference('hidelostitems');
1690         $is_opac       = 1;
1691     }
1692
1693     my $record_processor = Koha::RecordProcessor->new({
1694         filters => 'ViewPolicy'
1695     });
1696
1697     #Build branchnames hash
1698     my %branches = map { $_->branchcode => $_->branchname } Koha::Libraries->search({}, { order_by => 'branchname' });
1699
1700 # FIXME - We build an authorised values hash here, using the default framework
1701 # though it is possible to have different authvals for different fws.
1702
1703     my $shelflocations =
1704       { map { $_->{authorised_value} => $_->{lib} } Koha::AuthorisedValues->get_descriptions_by_koha_field( { frameworkcode => '', kohafield => 'items.location' } ) };
1705
1706     # get notforloan authorised value list (see $shelflocations  FIXME)
1707     my $av = Koha::MarcSubfieldStructures->search({ frameworkcode => '', kohafield => 'items.notforloan', authorised_value => [ -and => {'!=' => undef }, {'!=' => ''}] });
1708     my $notforloan_authorised_value = $av->count ? $av->next->authorised_value : undef;
1709
1710     #Get itemtype hash
1711     my $itemtypes = Koha::ItemTypes->search_with_localization;
1712     my %itemtypes = map { $_->{itemtype} => $_ } @{ $itemtypes->unblessed };
1713
1714     #search item field code
1715     my ($itemtag, undef) = &GetMarcFromKohaField( "items.itemnumber" );
1716
1717     ## find column names of items related to MARC
1718     my %subfieldstosearch;
1719     my @columns = Koha::Database->new()->schema()->resultset('Item')->result_source->columns;
1720     for my $column ( @columns ) {
1721         my ( $tagfield, $tagsubfield ) =
1722           &GetMarcFromKohaField( "items." . $column );
1723         if ( defined $tagsubfield ) {
1724             $subfieldstosearch{$column} = $tagsubfield;
1725         }
1726     }
1727
1728     # handle which records to actually retrieve
1729     my $times;
1730     if ( $hits && $offset + $results_per_page <= $hits ) {
1731         $times = $offset + $results_per_page;
1732     }
1733     else {
1734         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1735     }
1736
1737     my $marcflavour = C4::Context->preference("marcflavour");
1738     # We get the biblionumber position in MARC
1739     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField( 'biblio.biblionumber' );
1740
1741     # set stuff for XSLT processing here once, not later again for every record we retrieved
1742     my $xslfile;
1743     my $xslsyspref;
1744     if( $is_opac ){
1745         $xslsyspref = "OPACXSLTResultsDisplay";
1746         $xslfile = C4::Context->preference( $xslsyspref );
1747     } else {
1748         $xslsyspref = "XSLTResultsDisplay";
1749         $xslfile = C4::Context->preference( $xslsyspref ) || "default";
1750     }
1751     my $lang   = $xslfile ? C4::Languages::getlanguage()  : undef;
1752     my $sysxml = $xslfile ? C4::XSLT::get_xslt_sysprefs() : undef;
1753
1754     my $userenv = C4::Context->userenv;
1755     my $logged_in_user
1756         = ( defined $userenv and $userenv->{number} )
1757         ? Koha::Patrons->find( $userenv->{number} )
1758         : undef;
1759     my $patron_category_hide_lost_items = ($logged_in_user) ? $logged_in_user->category->hidelostitems : 0;
1760
1761     # loop through all of the records we've retrieved
1762     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1763
1764         my $marcrecord;
1765         if ($scan) {
1766             # For Scan searches we built USMARC data
1767             $marcrecord = MARC::Record->new_from_usmarc( $marcresults->[$i]);
1768         } else {
1769             # Normal search, render from Zebra's output
1770             $marcrecord = new_record_from_zebra(
1771                 'biblioserver',
1772                 $marcresults->[$i]
1773             );
1774
1775             if ( ! defined $marcrecord ) {
1776                 warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i];
1777                 next;
1778             }
1779         }
1780
1781         my $fw = $scan
1782              ? undef
1783              : $bibliotag < 10
1784                ? GetFrameworkCode($marcrecord->field($bibliotag)->data)
1785                : GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
1786
1787         SetUTF8Flag($marcrecord);
1788         my $oldbiblio = TransformMarcToKoha( $marcrecord, $fw );
1789         $oldbiblio->{result_number} = $i + 1;
1790
1791                 $oldbiblio->{normalized_upc}  = GetNormalizedUPC(       $marcrecord,$marcflavour);
1792                 $oldbiblio->{normalized_ean}  = GetNormalizedEAN(       $marcrecord,$marcflavour);
1793                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1794                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1795                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1796
1797                 # edition information, if any
1798         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1799
1800         my $itemtype = $oldbiblio->{itemtype} ? $itemtypes{$oldbiblio->{itemtype}} : undef;
1801         # add imageurl to itemtype if there is one
1802         $oldbiblio->{imageurl} = $itemtype ? getitemtypeimagelocation( $search_context->{'interface'}, $itemtype->{imageurl} ) : q{};
1803         # Build summary if there is one (the summary is defined in the itemtypes table)
1804         $oldbiblio->{description} = $itemtype ? $itemtype->{translated_description} : q{};
1805
1806         # FIXME: this is only used in the deprecated non-XLST opac results
1807         if ( !$xslfile && $is_opac && $itemtype && $itemtype->{summary} ) {
1808             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1809             my @fields  = $marcrecord->fields();
1810
1811             my $newsummary;
1812             foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
1813                 my $tags = {};
1814                 foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
1815                     $tag =~ /(.{3})(.)/;
1816                     if($marcrecord->field($1)){
1817                         my @abc = $marcrecord->field($1)->subfield($2);
1818                         $tags->{$tag} = $#abc + 1 ;
1819                     }
1820                 }
1821
1822                 # We catch how many times to repeat this line
1823                 my $max = 0;
1824                 foreach my $tag (keys(%$tags)){
1825                     $max = $tags->{$tag} if($tags->{$tag} > $max);
1826                  }
1827
1828                 # we replace, and repeat each line
1829                 for (my $i = 0 ; $i < $max ; $i++){
1830                     my $newline = $line;
1831
1832                     foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
1833                         $tag =~ /(.{3})(.)/;
1834
1835                         if($marcrecord->field($1)){
1836                             my @repl = $marcrecord->field($1)->subfield($2);
1837                             my $subfieldvalue = $repl[$i];
1838                             $newline =~ s/\[$tag\]/$subfieldvalue/g;
1839                         }
1840                     }
1841                     $newsummary .= "$newline\n";
1842                 }
1843             }
1844
1845             $newsummary =~ s/\[(.*?)]//g;
1846             $newsummary =~ s/\n/<br\/>/g;
1847             $oldbiblio->{summary} = $newsummary;
1848         }
1849
1850         # Pull out the items fields
1851         my @fields = $marcrecord->field($itemtag);
1852         my $marcflavor = C4::Context->preference("marcflavour");
1853
1854         # adding linked items that belong to host records
1855         if ( C4::Context->preference('EasyAnalyticalRecords') ) {
1856             my $analyticsfield = '773';
1857             if ($marcflavor eq 'MARC21' || $marcflavor eq 'NORMARC') {
1858                 $analyticsfield = '773';
1859             } elsif ($marcflavor eq 'UNIMARC') {
1860                 $analyticsfield = '461';
1861             }
1862             foreach my $hostfield ( $marcrecord->field($analyticsfield)) {
1863                 my $hostbiblionumber = $hostfield->subfield("0");
1864                 my $linkeditemnumber = $hostfield->subfield("9");
1865                 if( $hostbiblionumber ) {
1866                     my $linkeditemmarc = C4::Items::GetMarcItem( $hostbiblionumber, $linkeditemnumber );
1867                     if ($linkeditemmarc) {
1868                         my $linkeditemfield = $linkeditemmarc->field($itemtag);
1869                         if ($linkeditemfield) {
1870                             push( @fields, $linkeditemfield );
1871                         }
1872                     }
1873                 }
1874             }
1875         }
1876
1877         # Setting item statuses for display
1878         my @available_items_loop;
1879         my @onloan_items_loop;
1880         my @other_items_loop;
1881
1882         my $available_items;
1883         my $onloan_items;
1884         my $other_items;
1885
1886         my $ordered_count         = 0;
1887         my $available_count       = 0;
1888         my $onloan_count          = 0;
1889         my $longoverdue_count     = 0;
1890         my $other_count           = 0;
1891         my $withdrawn_count        = 0;
1892         my $itemlost_count        = 0;
1893         my $hideatopac_count      = 0;
1894         my $itembinding_count     = 0;
1895         my $itemdamaged_count     = 0;
1896         my $item_in_transit_count = 0;
1897         my $can_place_holds       = 0;
1898         my $item_onhold_count     = 0;
1899         my $notforloan_count      = 0;
1900         my $items_count           = scalar(@fields);
1901         my $maxitems_pref = C4::Context->preference('maxItemsinSearchResults');
1902         my $maxitems = $maxitems_pref ? $maxitems_pref - 1 : 1;
1903         my @hiddenitems; # hidden itemnumbers based on OpacHiddenItems syspref
1904
1905         # loop through every item
1906         foreach my $field (@fields) {
1907             my $item;
1908
1909             # populate the items hash
1910             foreach my $code ( keys %subfieldstosearch ) {
1911                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1912             }
1913             $item->{description} = $itemtypes{ $item->{itype} }{translated_description} if $item->{itype};
1914
1915                 # OPAC hidden items
1916             if ($is_opac) {
1917                 # hidden because lost
1918                 if ($hidelostitems && $item->{itemlost}) {
1919                     $hideatopac_count++;
1920                     next;
1921                 }
1922                 # hidden based on OpacHiddenItems syspref
1923                 my @hi = C4::Items::GetHiddenItemnumbers({ items=> [ $item ], borcat => $search_context->{category} });
1924                 if (scalar @hi) {
1925                     push @hiddenitems, @hi;
1926                     $hideatopac_count++;
1927                     next;
1928                 }
1929             }
1930
1931             my $hbranch     = C4::Context->preference('StaffSearchResultsDisplayBranch');
1932             my $otherbranch = $hbranch eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1933
1934             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1935             if ($item->{$hbranch}) {
1936                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1937             }
1938             elsif ($item->{$otherbranch}) {     # Last resort
1939                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1940             }
1941
1942             my $prefix =
1943                 ( $item->{$hbranch} ? $item->{$hbranch} . '--' : q{} )
1944               . ( $item->{location} ? $item->{location} : q{} )
1945               . ( $item->{itype}    ? $item->{itype}    : q{} )
1946               . ( $item->{itemcallnumber} ? $item->{itemcallnumber} : q{} );
1947 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1948             if ( $item->{onloan}
1949                 and $logged_in_user
1950                 and !( $patron_category_hide_lost_items and $item->{itemlost} ) )
1951             {
1952                 $onloan_count++;
1953                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1954                 $onloan_items->{$key}->{due_date} = $item->{onloan};
1955                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1956                 $onloan_items->{$key}->{branchname}     = $item->{branchname};
1957                 $onloan_items->{$key}->{location}       = $shelflocations->{ $item->{location} };
1958                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1959                 $onloan_items->{$key}->{description}    = $item->{description};
1960                 $onloan_items->{$key}->{imageurl} =
1961                   getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype} }->{imageurl} );
1962
1963                 # if something's checked out and lost, mark it as 'long overdue'
1964                 if ( $item->{itemlost} ) {
1965                     $onloan_items->{$key}->{longoverdue}++;
1966                     $longoverdue_count++;
1967                 }
1968                 else {    # can place holds as long as item isn't lost
1969                     $can_place_holds = 1;
1970                 }
1971             }
1972
1973          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1974             else {
1975
1976                 my $itemtype = C4::Context->preference("item-level_itypes")? $item->{itype}: $oldbiblio->{itemtype};
1977                 $item->{notforloan} = 1 if !$item->{notforloan} &&
1978                     $itemtype && $itemtypes{ $itemtype }->{notforloan};
1979
1980                 # item is on order
1981                 if ( $item->{notforloan} < 0 ) {
1982                     $ordered_count++;
1983                 } elsif ( $item->{notforloan} > 0 ) {
1984                     $notforloan_count++;
1985                 }
1986
1987                 # is item in transit?
1988                 my $transfertwhen = '';
1989                 my ($transfertfrom, $transfertto);
1990
1991                 # is item on the reserve shelf?
1992                 my $reservestatus = '';
1993
1994                 unless ($item->{withdrawn}
1995                         || $item->{itemlost}
1996                         || $item->{damaged}
1997                         || $item->{notforloan}
1998                         || ( C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck')
1999                         && $items_count > C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') ) ) {
2000
2001                     # A couple heuristics to limit how many times
2002                     # we query the database for item transfer information, sacrificing
2003                     # accuracy in some cases for speed;
2004                     #
2005                     # 1. don't query if item has one of the other statuses
2006                     # 2. don't check transit status if the bib has
2007                     #    more than 20 items
2008                     #
2009                     # FIXME: to avoid having the query the database like this, and to make
2010                     #        the in transit status count as unavailable for search limiting,
2011                     #        should map transit status to record indexed in Zebra.
2012                     #
2013                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
2014                     $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} );
2015                 }
2016
2017                 # item is withdrawn, lost, damaged, not for loan, reserved or in transit
2018                 if (   $item->{withdrawn}
2019                     || $item->{itemlost}
2020                     || $item->{damaged}
2021                     || $item->{notforloan}
2022                     || $reservestatus eq 'Waiting'
2023                     || ($transfertwhen && $transfertwhen ne ''))
2024                 {
2025                     $withdrawn_count++        if $item->{withdrawn};
2026                     $itemlost_count++        if $item->{itemlost};
2027                     $itemdamaged_count++     if $item->{damaged};
2028                     $item_in_transit_count++ if $transfertwhen && $transfertwhen ne '';
2029                     $item_onhold_count++     if $reservestatus eq 'Waiting';
2030                     $item->{status} = ($item->{withdrawn}//q{}) . "-" . ($item->{itemlost}//q{}) . "-" . ($item->{damaged}//q{}) . "-" . ($item->{notforloan}//q{});
2031
2032                     # can place a hold on a item if
2033                     # not lost nor withdrawn
2034                     # not damaged unless AllowHoldsOnDamagedItems is true
2035                     # item is either for loan or on order (notforloan < 0)
2036                     $can_place_holds = 1
2037                       if (
2038                            !$item->{itemlost}
2039                         && !$item->{withdrawn}
2040                         && ( !$item->{damaged} || C4::Context->preference('AllowHoldsOnDamagedItems') )
2041                         && ( !$item->{notforloan} || $item->{notforloan} < 0 )
2042                       );
2043
2044                     $other_count++;
2045
2046                     my $key = $prefix . $item->{status};
2047                     foreach (qw(withdrawn itemlost damaged branchname itemcallnumber)) {
2048                         $other_items->{$key}->{$_} = $item->{$_};
2049                     }
2050                     $other_items->{$key}->{intransit} = ( $transfertwhen ne '' ) ? 1 : 0;
2051                     $other_items->{$key}->{onhold} = ($reservestatus) ? 1 : 0;
2052                     $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value and $item->{notforloan};
2053                     $other_items->{$key}->{count}++ if $item->{$hbranch};
2054                     $other_items->{$key}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2055                     $other_items->{$key}->{description} = $item->{description};
2056                     $other_items->{$key}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2057                 }
2058                 # item is available
2059                 else {
2060                     $can_place_holds = 1;
2061                     $available_count++;
2062                     $available_items->{$prefix}->{count}++ if $item->{$hbranch};
2063                     foreach (qw(branchname itemcallnumber description)) {
2064                         $available_items->{$prefix}->{$_} = $item->{$_};
2065                     }
2066                     $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2067                     $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2068                 }
2069             }
2070         }    # notforloan, item level and biblioitem level
2071
2072         # if all items are hidden, do not show the record
2073         if ($items_count > 0 && $hideatopac_count == $items_count) {
2074             next;
2075         }
2076
2077         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
2078         for my $key ( sort keys %$onloan_items ) {
2079             (++$onloanitemscount > $maxitems) and last;
2080             push @onloan_items_loop, $onloan_items->{$key};
2081         }
2082         for my $key ( sort keys %$other_items ) {
2083             (++$otheritemscount > $maxitems) and last;
2084             push @other_items_loop, $other_items->{$key};
2085         }
2086         for my $key ( sort keys %$available_items ) {
2087             (++$availableitemscount > $maxitems) and last;
2088             push @available_items_loop, $available_items->{$key}
2089         }
2090
2091         # XSLT processing of some stuff
2092         # we fetched the sysprefs already before the loop through all retrieved record!
2093         if (!$scan && $xslfile) {
2094             $record_processor->options({
2095                 frameworkcode => $fw,
2096                 interface     => $search_context->{'interface'}
2097             });
2098
2099             $record_processor->process($marcrecord);
2100             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, $xslsyspref, 1, \@hiddenitems, $sysxml, $xslfile, $lang, $xslt_variables);
2101         }
2102
2103         # if biblio level itypes are used and itemtype is notforloan, it can't be reserved either
2104         if (!C4::Context->preference("item-level_itypes")) {
2105             if ($itemtype && $itemtype->{notforloan}) {
2106                 $can_place_holds = 0;
2107             }
2108         }
2109         $oldbiblio->{norequests} = 1 unless $can_place_holds;
2110         $oldbiblio->{items_count}          = $items_count;
2111         $oldbiblio->{available_items_loop} = \@available_items_loop;
2112         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
2113         $oldbiblio->{other_items_loop}     = \@other_items_loop;
2114         $oldbiblio->{availablecount}       = $available_count;
2115         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
2116         $oldbiblio->{onloancount}          = $onloan_count;
2117         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
2118         $oldbiblio->{othercount}           = $other_count;
2119         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
2120         $oldbiblio->{withdrawncount}        = $withdrawn_count;
2121         $oldbiblio->{itemlostcount}        = $itemlost_count;
2122         $oldbiblio->{damagedcount}         = $itemdamaged_count;
2123         $oldbiblio->{intransitcount}       = $item_in_transit_count;
2124         $oldbiblio->{onholdcount}          = $item_onhold_count;
2125         $oldbiblio->{orderedcount}         = $ordered_count;
2126         $oldbiblio->{notforloancount}      = $notforloan_count;
2127
2128         if (C4::Context->preference("AlternateHoldingsField") && $items_count == 0) {
2129             my $fieldspec = C4::Context->preference("AlternateHoldingsField");
2130             my $subfields = substr $fieldspec, 3;
2131             my $holdingsep = C4::Context->preference("AlternateHoldingsSeparator") || ' ';
2132             my @alternateholdingsinfo = ();
2133             my @holdingsfields = $marcrecord->field(substr $fieldspec, 0, 3);
2134             my $alternateholdingscount = 0;
2135
2136             for my $field (@holdingsfields) {
2137                 my %holding = ( holding => '' );
2138                 my $havesubfield = 0;
2139                 for my $subfield ($field->subfields()) {
2140                     if ((index $subfields, $$subfield[0]) >= 0) {
2141                         $holding{'holding'} .= $holdingsep if (length $holding{'holding'} > 0);
2142                         $holding{'holding'} .= $$subfield[1];
2143                         $havesubfield++;
2144                     }
2145                 }
2146                 if ($havesubfield) {
2147                     push(@alternateholdingsinfo, \%holding);
2148                     $alternateholdingscount++;
2149                 }
2150             }
2151
2152             $oldbiblio->{'ALTERNATEHOLDINGS'} = \@alternateholdingsinfo;
2153             $oldbiblio->{'alternateholdings_count'} = $alternateholdingscount;
2154         }
2155
2156         $oldbiblio->{biblio_object} = Koha::Biblios->find( $oldbiblio->{biblionumber} );
2157
2158         push( @newresults, $oldbiblio );
2159     }
2160
2161     return @newresults;
2162 }
2163
2164 =head2 enabled_staff_search_views
2165
2166 %hash = enabled_staff_search_views()
2167
2168 This function returns a hash that contains three flags obtained from the system
2169 preferences, used to determine whether a particular staff search results view
2170 is enabled.
2171
2172 =over 2
2173
2174 =item C<Output arg:>
2175
2176     * $hash{can_view_MARC} is true only if the MARC view is enabled
2177     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2178     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2179
2180 =item C<usage in the script:>
2181
2182 =back
2183
2184 $template->param ( C4::Search::enabled_staff_search_views );
2185
2186 =cut
2187
2188 sub enabled_staff_search_views
2189 {
2190         return (
2191                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2192                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2193                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2194         );
2195 }
2196
2197 =head2 z3950_search_args
2198
2199 $arrayref = z3950_search_args($matchpoints)
2200
2201 This function returns an array reference that contains the search parameters to be
2202 passed to the Z39.50 search script (z3950_search.pl). The array elements
2203 are hash refs whose keys are name and value, and whose values are the
2204 name of a search parameter, the value of that search parameter and the URL encoded
2205 value of that parameter.
2206
2207 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2208
2209 The search parameter values are obtained from the bibliographic record whose
2210 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2211
2212 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2213 a general purpose search argument. In this case, the returned array contains only
2214 entry: the key is 'title' and the value is derived from $matchpoints.
2215
2216 If a search parameter value is undefined or empty, it is not included in the returned
2217 array.
2218
2219 The returned array reference may be passed directly to the template parameters.
2220
2221 =over 2
2222
2223 =item C<Output arg:>
2224
2225     * $array containing hash refs as described above
2226
2227 =item C<usage in the script:>
2228
2229 =back
2230
2231 $data = Biblio::GetBiblioData($bibno);
2232 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2233
2234 *OR*
2235
2236 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2237
2238 =cut
2239
2240 sub z3950_search_args {
2241     my $bibrec = shift;
2242
2243     my $isbn_string = ref( $bibrec ) ? $bibrec->{title} : $bibrec;
2244     my $isbn = Business::ISBN->new( $isbn_string );
2245
2246     if (defined $isbn && $isbn->is_valid)
2247     {
2248         if ( ref($bibrec) ) {
2249             $bibrec->{isbn} = $isbn_string;
2250             $bibrec->{title} = undef;
2251         } else {
2252             $bibrec = { isbn => $isbn_string };
2253         }
2254     }
2255     else {
2256         $bibrec = { title => $bibrec } if !ref $bibrec;
2257     }
2258     my $array = [];
2259     for my $field (qw/ lccn isbn issn title author dewey subject /)
2260     {
2261         push @$array, { name => $field, value => $bibrec->{$field} }
2262           if defined $bibrec->{$field};
2263     }
2264     return $array;
2265 }
2266
2267 =head2 GetDistinctValues($field);
2268
2269 C<$field> is a reference to the fields array
2270
2271 =cut
2272
2273 sub GetDistinctValues {
2274     my ($fieldname,$string)=@_;
2275     # returns a reference to a hash of references to branches...
2276     if ($fieldname=~/\./){
2277                         my ($table,$column)=split /\./, $fieldname;
2278                         my $dbh = C4::Context->dbh;
2279                         warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column " if $DEBUG;
2280                         my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
2281                         $sth->execute;
2282                         my $elements=$sth->fetchall_arrayref({});
2283                         return $elements;
2284    }
2285    else {
2286                 $string||= qq("");
2287                 my @servers=qw<biblioserver authorityserver>;
2288                 my (@zconns,@results);
2289         for ( my $i = 0 ; $i < @servers ; $i++ ) {
2290                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
2291                         $results[$i] =
2292                       $zconns[$i]->scan(
2293                         ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
2294                       );
2295                 }
2296                 # The big moment: asynchronously retrieve results from all servers
2297                 my @elements;
2298         _ZOOM_event_loop(
2299             \@zconns,
2300             \@results,
2301             sub {
2302                 my ( $i, $size ) = @_;
2303                 for ( my $j = 0 ; $j < $size ; $j++ ) {
2304                     my %hashscan;
2305                     @hashscan{qw(value cnt)} =
2306                       $results[ $i - 1 ]->display_term($j);
2307                     push @elements, \%hashscan;
2308                 }
2309             }
2310         );
2311                 return \@elements;
2312    }
2313 }
2314
2315 =head2 _ZOOM_event_loop
2316
2317     _ZOOM_event_loop(\@zconns, \@results, sub {
2318         my ( $i, $size ) = @_;
2319         ....
2320     } );
2321
2322 Processes a ZOOM event loop and passes control to a closure for
2323 processing the results, and destroying the resultsets.
2324
2325 =cut
2326
2327 sub _ZOOM_event_loop {
2328     my ($zconns, $results, $callback) = @_;
2329     while ( ( my $i = ZOOM::event( $zconns ) ) != 0 ) {
2330         my $ev = $zconns->[ $i - 1 ]->last_event();
2331         if ( $ev == ZOOM::Event::ZEND ) {
2332             next unless $results->[ $i - 1 ];
2333             my $size = $results->[ $i - 1 ]->size();
2334             if ( $size > 0 ) {
2335                 $callback->($i, $size);
2336             }
2337         }
2338     }
2339
2340     foreach my $result (@$results) {
2341         $result->destroy();
2342     }
2343 }
2344
2345 =head2 new_record_from_zebra
2346
2347 Given raw data from a searchengine result set, return a MARC::Record object
2348
2349 This helper function is needed to take into account all the involved
2350 system preferences and configuration variables to properly create the
2351 MARC::Record object.
2352
2353 If we are using GRS-1, then the raw data we get from Zebra should be USMARC
2354 data. If we are using DOM, then it has to be MARCXML.
2355
2356 If we are using elasticsearch, it'll already be a MARC::Record and this
2357 function needs a new name.
2358
2359 =cut
2360
2361 sub new_record_from_zebra {
2362
2363     my $server   = shift;
2364     my $raw_data = shift;
2365     # Set the default indexing modes
2366     my $search_engine = C4::Context->preference("SearchEngine");
2367     if ($search_engine eq 'Elasticsearch') {
2368         return ref $raw_data eq 'MARC::Record' ? $raw_data : MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2369     }
2370     my $index_mode = ( $server eq 'biblioserver' )
2371                         ? C4::Context->config('zebra_bib_index_mode') // 'dom'
2372                         : C4::Context->config('zebra_auth_index_mode') // 'dom';
2373
2374     my $marc_record =  eval {
2375         if ( $index_mode eq 'dom' ) {
2376             MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2377         } else {
2378             MARC::Record->new_from_usmarc( $raw_data );
2379         }
2380     };
2381
2382     if ($@) {
2383         return;
2384     } else {
2385         return $marc_record;
2386     }
2387
2388 }
2389
2390 END { }    # module clean-up code here (global destructor)
2391
2392 1;
2393 __END__
2394
2395 =head1 AUTHOR
2396
2397 Koha Development Team <http://koha-community.org/>
2398
2399 =cut