C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  17
  18 use Modern::Perl;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  22 use C4::Koha;      # getFacets
  23 use Koha::DateUtils;
  24 use Koha::Libraries;
  25 use Lingua::Stem;
  26 use C4::Search::PazPar2;
  27 use XML::Simple;
  28 use C4::XSLT;
  29 use C4::Reserves;    # GetReserveStatus
  30 use C4::Debug;
  31 use C4::Charset;
  32 use Koha::AuthorisedValues;
  33 use Koha::ItemTypes;
  34 use Koha::Libraries;
  35 use Koha::Patrons;
  36 use Koha::RecordProcessor;
  37 use YAML;
  38 use URI::Escape;
  39 use Business::ISBN;
  40 use MARC::Record;
  41 use MARC::Field;
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  43
  44 BEGIN {
  45     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  46 }
  47
  48 =head1 NAME
  49
  50 C4::Search - Functions for searching the Koha catalog.
  51
  52 =head1 SYNOPSIS
  53
  54 See opac/opac-search.pl or catalogue/search.pl for example of usage
  55
  56 =head1 DESCRIPTION
  57
  58 This module provides searching functions for Koha's bibliographic databases
  59
  60 =head1 FUNCTIONS
  61
  62 =cut
  63
  64 @ISA    = qw(Exporter);
  65 @EXPORT = qw(
  66   &FindDuplicate
  67   &SimpleSearch
  68   &searchResults
  69   &getRecords
  70   &buildQuery
  71   &GetDistinctValues
  72   &enabled_staff_search_views
  73 );
  74
  75 # make all your functions, whether exported or not;
  76
  77 =head2 FindDuplicate
  78
  79 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  80
  81 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  82
  83 =cut
  84
  85 sub FindDuplicate {
  86     my ($record) = @_;
  87     my $dbh = C4::Context->dbh;
  88     my $result = TransformMarcToKoha( $record, '' );
  89     my $sth;
  90     my $query;
  91
  92     # search duplicate on ISBN, easy and fast..
  93     # ... normalize first
  94     if ( $result->{isbn} ) {
  95         $result->{isbn} =~ s/\(.*$//;
  96         $result->{isbn} =~ s/\s+$//;
  97         $query = "isbn:$result->{isbn}";
  98     }
  99     else {
 100
 101         my $titleindex = 'ti,ext';
 102         my $authorindex = 'au,ext';
 103         my $op = 'and';
 104
 105         $result->{title} =~ s /\\//g;
 106         $result->{title} =~ s /\"//g;
 107         $result->{title} =~ s /\(//g;
 108         $result->{title} =~ s /\)//g;
 109
 110         $query = "$titleindex:\"$result->{title}\"";
 111         if   ( $result->{author} ) {
 112             $result->{author} =~ s /\\//g;
 113             $result->{author} =~ s /\"//g;
 114             $result->{author} =~ s /\(//g;
 115             $result->{author} =~ s /\)//g;
 116
 117             $query .= " $op $authorindex:\"$result->{author}\"";
 118         }
 119     }
 120
 121     my ( $error, $searchresults, undef ) = SimpleSearch($query); # FIXME :: hardcoded !
 122     my @results;
 123     if (!defined $error) {
 124         foreach my $possible_duplicate_record (@{$searchresults}) {
 125             my $marcrecord = new_record_from_zebra(
 126                 'biblioserver',
 127                 $possible_duplicate_record
 128             );
 129
 130             my $result = TransformMarcToKoha( $marcrecord, '' );
 131
 132             # FIXME :: why 2 $biblionumber ?
 133             if ($result) {
 134                 push @results, $result->{'biblionumber'};
 135                 push @results, $result->{'title'};
 136             }
 137         }
 138     }
 139     return @results;
 140 }
 141
 142 =head2 SimpleSearch
 143
 144 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers], [%options] );
 145
 146 This function provides a simple search API on the bibliographic catalog
 147
 148 =over 2
 149
 150 =item C<input arg:>
 151
 152     * $query can be a simple keyword or a complete CCL query
 153     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 154     * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0
 155     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 156     * %options is optional. (e.g. "skip_normalize" allows you to skip changing : to = )
 157
 158
 159 =item C<Return:>
 160
 161     Returns an array consisting of three elements
 162     * $error is undefined unless an error is detected
 163     * $results is a reference to an array of records.
 164     * $total_hits is the number of hits that would have been returned with no limit
 165
 166     If an error is returned the two other return elements are undefined. If error itself is undefined
 167     the other two elements are always defined
 168
 169 =item C<usage in the script:>
 170
 171 =back
 172
 173 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 174
 175 if (defined $error) {
 176     $template->param(query_error => $error);
 177     warn "error: ".$error;
 178     output_html_with_http_headers $input, $cookie, $template->output;
 179     exit;
 180 }
 181
 182 my $hits = @{$marcresults};
 183 my @results;
 184
 185 for my $r ( @{$marcresults} ) {
 186     my $marcrecord = MARC::File::USMARC::decode($r);
 187     my $biblio = TransformMarcToKoha($marcrecord,q{});
 188
 189     #build the iarray of hashs for the template.
 190     push @results, {
 191         title           => $biblio->{'title'},
 192         subtitle        => $biblio->{'subtitle'},
 193         biblionumber    => $biblio->{'biblionumber'},
 194         author          => $biblio->{'author'},
 195         publishercode   => $biblio->{'publishercode'},
 196         publicationyear => $biblio->{'publicationyear'},
 197         };
 198
 199 }
 200
 201 $template->param(result=>\@results);
 202
 203 =cut
 204
 205 sub SimpleSearch {
 206     my ( $query, $offset, $max_results, $servers, %options )  = @_;
 207
 208     return ( 'No query entered', undef, undef ) unless $query;
 209     # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 210     my @servers = defined ( $servers ) ? @$servers : ( 'biblioserver' );
 211     my @zoom_queries;
 212     my @tmpresults;
 213     my @zconns;
 214     my $results = [];
 215     my $total_hits = 0;
 216
 217     # Initialize & Search Zebra
 218     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 219         eval {
 220             $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 221             $query =~ s/:/=/g unless $options{skip_normalize};
 222             $zoom_queries[$i] = ZOOM::Query::CCL2RPN->new( $query, $zconns[$i]);
 223             $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 224
 225             # error handling
 226             my $error =
 227                 $zconns[$i]->errmsg() . " ("
 228               . $zconns[$i]->errcode() . ") "
 229               . $zconns[$i]->addinfo() . " "
 230               . $zconns[$i]->diagset();
 231
 232             return ( $error, undef, undef ) if $zconns[$i]->errcode();
 233         };
 234         if ($@) {
 235
 236             # caught a ZOOM::Exception
 237             my $error =
 238                 $@->message() . " ("
 239               . $@->code() . ") "
 240               . $@->addinfo() . " "
 241               . $@->diagset();
 242             warn $error." for query: $query";
 243             return ( $error, undef, undef );
 244         }
 245     }
 246
 247     _ZOOM_event_loop(
 248         \@zconns,
 249         \@tmpresults,
 250         sub {
 251             my ($i, $size) = @_;
 252             my $first_record = defined($offset) ? $offset + 1 : 1;
 253             my $hits = $tmpresults[ $i - 1 ]->size();
 254             $total_hits += $hits;
 255             my $last_record = $hits;
 256             if ( defined $max_results && $offset + $max_results < $hits ) {
 257                 $last_record = $offset + $max_results;
 258             }
 259
 260             for my $j ( $first_record .. $last_record ) {
 261                 my $record = eval {
 262                   $tmpresults[ $i - 1 ]->record( $j - 1 )->raw()
 263                   ;    # 0 indexed
 264                 };
 265                 push @{$results}, $record if defined $record;
 266             }
 267         }
 268     );
 269
 270     foreach my $zoom_query (@zoom_queries) {
 271         $zoom_query->destroy();
 272     }
 273
 274     return ( undef, $results, $total_hits );
 275 }
 276
 277 =head2 getRecords
 278
 279 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 280
 281         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 282         $results_per_page, $offset,       $branches,       $itemtypes,
 283         $query_type,       $scan,         $opac
 284     );
 285
 286 The all singing, all dancing, multi-server, asynchronous, scanning,
 287 searching, record nabbing, facet-building
 288
 289 See verbose embedded documentation.
 290
 291 =cut
 292
 293 sub getRecords {
 294     my (
 295         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 296         $results_per_page, $offset,       $branches,         $itemtypes,
 297         $query_type,       $scan,         $opac
 298     ) = @_;
 299
 300     my @servers = @$servers_ref;
 301     my @sort_by = @$sort_by_ref;
 302     $offset = 0 if $offset < 0;
 303
 304     # Initialize variables for the ZOOM connection and results object
 305     my @zconns;
 306     my @results;
 307     my $results_hashref = ();
 308
 309     # TODO simplify this structure ( { branchcode => $branchname } is enought) and remove this parameter
 310     $branches ||= { map { $_->branchcode => { branchname => $_->branchname } } Koha::Libraries->search };
 311
 312     # Initialize variables for the faceted results objects
 313     my $facets_counter = {};
 314     my $facets_info    = {};
 315     my $facets         = getFacets();
 316
 317     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 318
 319     ### LOOP THROUGH THE SERVERS
 320     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 321         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 322
 323 # perform the search, create the results objects
 324 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 325         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 326
 327         #$query_to_use = $simple_query if $scan;
 328         warn $simple_query if ( $scan and $DEBUG );
 329
 330         # Check if we've got a query_type defined, if so, use it
 331         eval {
 332             if ($query_type) {
 333                 if ($query_type =~ /^ccl/) {
 334                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 335                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 336                 } elsif ($query_type =~ /^cql/) {
 337                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CQL->new($query_to_use, $zconns[$i]));
 338                 } elsif ($query_type =~ /^pqf/) {
 339                     $results[$i] = $zconns[$i]->search(ZOOM::Query::PQF->new($query_to_use, $zconns[$i]));
 340                 } else {
 341                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 342                 }
 343             } elsif ($scan) {
 344                     $results[$i] = $zconns[$i]->scan(  ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 345             } else {
 346                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 347             }
 348         };
 349         if ($@) {
 350             warn "WARNING: query problem with $query_to_use " . $@;
 351         }
 352
 353         # Concatenate the sort_by limits and pass them to the results object
 354         # Note: sort will override rank
 355         my $sort_by;
 356         foreach my $sort (@sort_by) {
 357             if ( $sort eq "author_az" || $sort eq "author_asc" ) {
 358                 $sort_by .= "1=1003 <i ";
 359             }
 360             elsif ( $sort eq "author_za" || $sort eq "author_dsc" ) {
 361                 $sort_by .= "1=1003 >i ";
 362             }
 363             elsif ( $sort eq "popularity_asc" ) {
 364                 $sort_by .= "1=9003 <i ";
 365             }
 366             elsif ( $sort eq "popularity_dsc" ) {
 367                 $sort_by .= "1=9003 >i ";
 368             }
 369             elsif ( $sort eq "call_number_asc" ) {
 370                 $sort_by .= "1=8007  <i ";
 371             }
 372             elsif ( $sort eq "call_number_dsc" ) {
 373                 $sort_by .= "1=8007 >i ";
 374             }
 375             elsif ( $sort eq "pubdate_asc" ) {
 376                 $sort_by .= "1=31 <i ";
 377             }
 378             elsif ( $sort eq "pubdate_dsc" ) {
 379                 $sort_by .= "1=31 >i ";
 380             }
 381             elsif ( $sort eq "acqdate_asc" ) {
 382                 $sort_by .= "1=32 <i ";
 383             }
 384             elsif ( $sort eq "acqdate_dsc" ) {
 385                 $sort_by .= "1=32 >i ";
 386             }
 387             elsif ( $sort eq "title_az" || $sort eq "title_asc" ) {
 388                 $sort_by .= "1=4 <i ";
 389             }
 390             elsif ( $sort eq "title_za" || $sort eq "title_dsc" ) {
 391                 $sort_by .= "1=4 >i ";
 392             }
 393             else {
 394                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 395             }
 396         }
 397         if ( $sort_by && !$scan && $results[$i] ) {
 398             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 399                 warn "WARNING sort $sort_by failed";
 400             }
 401         }
 402     }    # finished looping through servers
 403
 404     # The big moment: asynchronously retrieve results from all servers
 405         _ZOOM_event_loop(
 406             \@zconns,
 407             \@results,
 408             sub {
 409                 my ( $i, $size ) = @_;
 410                 my $results_hash;
 411
 412                 # loop through the results
 413                 $results_hash->{'hits'} = $size;
 414                 my $times;
 415                 if ( $offset + $results_per_page <= $size ) {
 416                     $times = $offset + $results_per_page;
 417                 }
 418                 else {
 419                     $times = $size;
 420                 }
 421
 422                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 423                     my $record;
 424
 425                     ## Check if it's an index scan
 426                     if ($scan) {
 427                         my ( $term, $occ ) = $results[ $i - 1 ]->display_term($j);
 428
 429                  # here we create a minimal MARC record and hand it off to the
 430                  # template just like a normal result ... perhaps not ideal, but
 431                  # it works for now
 432                         my $tmprecord = MARC::Record->new();
 433                         $tmprecord->encoding('UTF-8');
 434                         my $tmptitle;
 435                         my $tmpauthor;
 436
 437                 # the minimal record in author/title (depending on MARC flavour)
 438                         if ( C4::Context->preference("marcflavour") eq
 439                             "UNIMARC" )
 440                         {
 441                             $tmptitle = MARC::Field->new(
 442                                 '200', ' ', ' ',
 443                                 a => $term,
 444                                 f => $occ
 445                             );
 446                             $tmprecord->append_fields($tmptitle);
 447                         }
 448                         else {
 449                             $tmptitle =
 450                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 451                             $tmpauthor =
 452                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 453                             $tmprecord->append_fields($tmptitle);
 454                             $tmprecord->append_fields($tmpauthor);
 455                         }
 456                         $results_hash->{'RECORDS'}[$j] =
 457                           $tmprecord->as_usmarc();
 458                     }
 459
 460                     # not an index scan
 461                     else {
 462                         $record = $results[ $i - 1 ]->record($j)->raw();
 463                         # warn "RECORD $j:".$record;
 464                         $results_hash->{'RECORDS'}[$j] = $record;
 465                     }
 466
 467                 }
 468                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 469
 470                 # Fill the facets while we're looping, but only for the
 471                 # biblioserver and not for a scan
 472                 if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) {
 473                     $facets_counter = GetFacets( $results[ $i - 1 ] );
 474                     $facets_info    = _get_facets_info( $facets );
 475                 }
 476
 477                 # BUILD FACETS
 478                 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 479                     for my $link_value (
 480                         sort { $a cmp $b } keys %$facets_counter
 481                       )
 482                     {
 483                         my @this_facets_array;
 484                         for my $one_facet (
 485                             sort {
 486                                 $facets_counter->{$link_value}
 487                                   ->{$b} <=> $facets_counter->{$link_value}
 488                                   ->{$a}
 489                             } keys %{ $facets_counter->{$link_value} }
 490                           )
 491                         {
 492 # Sanitize the link value : parenthesis, question and exclamation mark will cause errors with CCL
 493                             my $facet_link_value = $one_facet;
 494                             $facet_link_value =~ s/[()!?¡¿؟]/ /g;
 495
 496                             # fix the length that will display in the label,
 497                             my $facet_label_value = $one_facet;
 498                             my $facet_max_length  = C4::Context->preference(
 499                                 'FacetLabelTruncationLength')
 500                               || 20;
 501                             $facet_label_value =
 502                               substr( $one_facet, 0, $facet_max_length )
 503                               . "..."
 504                               if length($facet_label_value) >
 505                                   $facet_max_length;
 506
 507                         # if it's a branch, label by the name, not the code,
 508                             if ( $link_value =~ /branch/ ) {
 509                                 if (   defined $branches
 510                                     && ref($branches) eq "HASH"
 511                                     && defined $branches->{$one_facet}
 512                                     && ref( $branches->{$one_facet} ) eq
 513                                     "HASH" )
 514                                 {
 515                                     $facet_label_value =
 516                                       $branches->{$one_facet}
 517                                       ->{'branchname'};
 518                                 }
 519                                 else {
 520                                     $facet_label_value = "*";
 521                                 }
 522                             }
 523
 524                       # if it's a itemtype, label by the name, not the code,
 525                             if ( $link_value =~ /itype/ ) {
 526                                 if (   defined $itemtypes
 527                                     && ref($itemtypes) eq "HASH"
 528                                     && defined $itemtypes->{$one_facet}
 529                                     && ref( $itemtypes->{$one_facet} ) eq
 530                                     "HASH" )
 531                                 {
 532                                     $facet_label_value =
 533                                       $itemtypes->{$one_facet}
 534                                       ->{translated_description};
 535                                 }
 536                             }
 537
 538            # also, if it's a location code, use the name instead of the code
 539                             if ( $link_value =~ /location/ ) {
 540                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 541                                 my $av = Koha::AuthorisedValues->search({ category => 'LOC', authorised_value => $one_facet });
 542                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 543                             }
 544
 545                             # also, if it's a collection code, use the name instead of the code
 546                             if ( $link_value =~ /ccode/ ) {
 547                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 548                                 my $av = Koha::AuthorisedValues->search({ category => 'CCODE', authorised_value => $one_facet });
 549                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 550                             }
 551
 552             # but we're down with the whole label being in the link's title.
 553                             push @this_facets_array,
 554                               {
 555                                 facet_count =>
 556                                   $facets_counter->{$link_value}
 557                                   ->{$one_facet},
 558                                 facet_label_value => $facet_label_value,
 559                                 facet_title_value => $one_facet,
 560                                 facet_link_value  => $facet_link_value,
 561                                 type_link_value   => $link_value,
 562                               }
 563                               if ($facet_label_value);
 564                         }
 565
 566                         push @facets_loop,
 567                           {
 568                             type_link_value => $link_value,
 569                             type_id         => $link_value . "_id",
 570                             "type_label_"
 571                               . $facets_info->{$link_value}->{'label_value'} =>
 572                               1,
 573                             facets     => \@this_facets_array,
 574                           }
 575                           unless (
 576                             (
 577                                 $facets_info->{$link_value}->{'label_value'} =~
 578                                 /Libraries/
 579                             )
 580                             and ( Koha::Libraries->search->count == 1 )
 581                           );
 582                     }
 583                 }
 584             }
 585         );
 586
 587     # This sorts the facets into alphabetical order
 588     if (@facets_loop) {
 589         foreach my $f (@facets_loop) {
 590             $f->{facets} = [ sort { uc($a->{facet_label_value}) cmp uc($b->{facet_label_value}) } @{ $f->{facets} } ];
 591         }
 592     }
 593
 594     return ( undef, $results_hashref, \@facets_loop );
 595 }
 596
 597 sub GetFacets {
 598
 599     my $rs = shift;
 600     my $facets;
 601
 602     my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0;
 603
 604     if ( $use_zebra_facets ) {
 605         $facets = _get_facets_from_zebra( $rs );
 606     } else {
 607         $facets = _get_facets_from_records( $rs );
 608     }
 609
 610     return $facets;
 611 }
 612
 613 sub _get_facets_from_records {
 614
 615     my $rs = shift;
 616
 617     my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20;
 618     my $facets_config  = getFacets();
 619     my $facets         = {};
 620     my $size           = $rs->size();
 621     my $jmax           = $size > $facets_maxrecs
 622                             ? $facets_maxrecs
 623                             : $size;
 624
 625     for ( my $j = 0 ; $j < $jmax ; $j++ ) {
 626
 627         my $marc_record = new_record_from_zebra (
 628                 'biblioserver',
 629                 $rs->record( $j )->raw()
 630         );
 631
 632         if ( ! defined $marc_record ) {
 633             warn "ERROR DECODING RECORD - $@: " .
 634                 $rs->record( $j )->raw();
 635             next;
 636         }
 637
 638         _get_facets_data_from_record( $marc_record, $facets_config, $facets );
 639     }
 640
 641     return $facets;
 642 }
 643
 644 =head2 _get_facets_data_from_record
 645
 646     C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter );
 647
 648 Internal function that extracts facets information from a MARC::Record object
 649 and populates $facets_counter for using in getRecords.
 650
 651 $facets is expected to be filled with C4::Koha::getFacets output (i.e. the configured
 652 facets for Zebra).
 653
 654 =cut
 655
 656 sub _get_facets_data_from_record {
 657
 658     my ( $marc_record, $facets, $facets_counter ) = @_;
 659
 660     for my $facet (@$facets) {
 661
 662         my @used_datas = ();
 663
 664         foreach my $tag ( @{ $facet->{ tags } } ) {
 665
 666             # tag number is the first three digits
 667             my $tag_num          = substr( $tag, 0, 3 );
 668             # subfields are the remainder
 669             my $subfield_letters = substr( $tag, 3 );
 670
 671             my @fields = $marc_record->field( $tag_num );
 672             foreach my $field (@fields) {
 673                 # If $field->indicator(1) eq 'z', it means it is a 'see from'
 674                 # field introduced because of IncludeSeeFromInSearches, so skip it
 675                 next if $field->indicator(1) eq 'z';
 676
 677                 my $data = $field->as_string( $subfield_letters, $facet->{ sep } );
 678                 $data =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
 679
 680                 unless ( grep { $_ eq $data } @used_datas ) {
 681                     push @used_datas, $data;
 682                     $facets_counter->{ $facet->{ idx } }->{ $data }++;
 683                 }
 684             }
 685         }
 686     }
 687 }
 688
 689 =head2 _get_facets_from_zebra
 690
 691     my $facets = _get_facets_from_zebra( $result_set )
 692
 693 Retrieves facets for a specified result set. It loops through the facets defined
 694 in C4::Koha::getFacets and returns a hash with the following structure:
 695
 696    {  facet_idx => {
 697             facet_value => count
 698       },
 699       ...
 700    }
 701
 702 =cut
 703
 704 sub _get_facets_from_zebra {
 705
 706     my $rs = shift;
 707
 708     # save current elementSetName
 709     my $elementSetName = $rs->option( 'elementSetName' );
 710
 711     my $facets_loop = getFacets();
 712     my $facets_data  = {};
 713     # loop through defined facets and fill the facets hashref
 714     foreach my $facet ( @$facets_loop ) {
 715
 716         my $idx = $facet->{ idx };
 717         my $sep = $facet->{ sep };
 718         my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep );
 719         if ( $facet_values ) {
 720             # we've actually got a result
 721             $facets_data->{ $idx } = $facet_values;
 722         }
 723     }
 724     # set elementSetName to its previous value to avoid side effects
 725     $rs->option( elementSetName => $elementSetName );
 726
 727     return $facets_data;
 728 }
 729
 730 =head2 _get_facet_from_result_set
 731
 732     my $facet_values =
 733         C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep )
 734
 735 Internal function that extracts facet information for a specific index ($facet_idx) and
 736 returns a hash containing facet values and count:
 737
 738     {
 739         $facet_value => $count ,
 740         ...
 741     }
 742
 743 Warning: this function has the side effect of changing the elementSetName for the result
 744 set. It is a helper function for the main loop, which takes care of backing it up for
 745 restoring.
 746
 747 =cut
 748
 749 sub _get_facet_from_result_set {
 750
 751     my $facet_idx = shift;
 752     my $rs        = shift;
 753     my $sep       = shift;
 754
 755     my $internal_sep  = '<*>';
 756     my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20;
 757
 758     return if ( ! defined $facet_idx || ! defined $rs );
 759     # zebra's facet element, untokenized index
 760     my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount;
 761     # configure zebra results for retrieving the desired facet
 762     $rs->option( elementSetName => $facet_element );
 763     # get the facet record from result set
 764     my $facet = $rs->record( 0 )->raw;
 765     # if the facet has no restuls...
 766     return if !defined $facet;
 767     # TODO: benchmark DOM vs. SAX performance
 768     my $facet_dom = XML::LibXML->load_xml(
 769       string => ($facet)
 770     );
 771     my @terms = $facet_dom->getElementsByTagName('term');
 772     return if ! @terms;
 773
 774     my $facets = {};
 775     foreach my $term ( @terms ) {
 776         my $facet_value = $term->textContent;
 777         $facet_value =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
 778         $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
 779         $facets->{ $facet_value } += $term->getAttribute( 'occur' );
 780     }
 781
 782     return $facets;
 783 }
 784
 785 =head2 _get_facets_info
 786
 787     my $facets_info = C4::Search::_get_facets_info( $facets )
 788
 789 Internal function that extracts facets information and properly builds
 790 the data structure needed to render facet labels.
 791
 792 =cut
 793
 794 sub _get_facets_info {
 795
 796     my $facets = shift;
 797
 798     my $facets_info = {};
 799
 800     for my $facet ( @$facets ) {
 801         $facets_info->{ $facet->{ idx } }->{ label_value } = $facet->{ label };
 802     }
 803
 804     return $facets_info;
 805 }
 806
 807 sub pazGetRecords {
 808     my (
 809         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 810         $results_per_page, $offset,       $branches,       $query_type,
 811         $scan
 812     ) = @_;
 813     #NOTE: Parameter $branches is not used here !
 814
 815     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 816     $paz->init();
 817     $paz->search($simple_query);
 818     sleep 1;   # FIXME: WHY?
 819
 820     # do results
 821     my $results_hashref = {};
 822     my $stats = XMLin($paz->stat);
 823     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 824
 825     # for a grouped search result, the number of hits
 826     # is the number of groups returned; 'bib_hits' will have
 827     # the total number of bibs.
 828     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 829     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 830
 831     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 832         my $recid = $hit->{recid}->[0];
 833
 834         my $work_title = $hit->{'md-work-title'}->[0];
 835         my $work_author;
 836         if (exists $hit->{'md-work-author'}) {
 837             $work_author = $hit->{'md-work-author'}->[0];
 838         }
 839         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 840
 841         my $result_group = {};
 842         $result_group->{'group_label'} = $group_label;
 843         $result_group->{'group_merge_key'} = $recid;
 844
 845         my $count = 1;
 846         if (exists $hit->{count}) {
 847             $count = $hit->{count}->[0];
 848         }
 849         $result_group->{'group_count'} = $count;
 850
 851         for (my $i = 0; $i < $count; $i++) {
 852             # FIXME -- may need to worry about diacritics here
 853             my $rec = $paz->record($recid, $i);
 854             push @{ $result_group->{'RECORDS'} }, $rec;
 855         }
 856
 857         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 858     }
 859
 860     # pass through facets
 861     my $termlist_xml = $paz->termlist('author,subject');
 862     my $terms = XMLin($termlist_xml, forcearray => 1);
 863     my @facets_loop = ();
 864     #die Dumper($results);
 865 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 866 #        my @facets = ();
 867 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 868 #            push @facets, {
 869 #                facet_label_value => $facet->{'name'}->[0],
 870 #            };
 871 #        }
 872 #        push @facets_loop, ( {
 873 #            type_label => $list,
 874 #            facets => \@facets,
 875 #        } );
 876 #    }
 877
 878     return ( undef, $results_hashref, \@facets_loop );
 879 }
 880
 881 # TRUNCATION
 882 sub _detect_truncation {
 883     my ( $operand, $index ) = @_;
 884     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 885         @regexpr );
 886     $operand =~ s/^ //g;
 887     my @wordlist = split( /\s/, $operand );
 888     foreach my $word (@wordlist) {
 889         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 890             push @rightlefttruncated, $word;
 891         }
 892         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 893             push @lefttruncated, $word;
 894         }
 895         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 896             push @righttruncated, $word;
 897         }
 898         elsif ( index( $word, "*" ) < 0 ) {
 899             push @nontruncated, $word;
 900         }
 901         else {
 902             push @regexpr, $word;
 903         }
 904     }
 905     return (
 906         \@nontruncated,       \@righttruncated, \@lefttruncated,
 907         \@rightlefttruncated, \@regexpr
 908     );
 909 }
 910
 911 # STEMMING
 912 sub _build_stemmed_operand {
 913     my ($operand,$lang) = @_;
 914     require Lingua::Stem::Snowball ;
 915     my $stemmed_operand=q{};
 916
 917     # Stemmer needs language
 918     return $operand unless $lang;
 919
 920     # If operand contains a digit, it is almost certainly an identifier, and should
 921     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 922     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 923     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 924     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 925     return $operand if $operand =~ /\d/;
 926
 927 # FIXME: the locale should be set based on the user's language and/or search choice
 928     #warn "$lang";
 929     # Make sure we only use the first two letters from the language code
 930     $lang = lc(substr($lang, 0, 2));
 931     # The language codes for the two variants of Norwegian will now be "nb" and "nn",
 932     # none of which Lingua::Stem::Snowball can use, so we need to "translate" them
 933     if ($lang eq 'nb' || $lang eq 'nn') {
 934       $lang = 'no';
 935     }
 936     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
 937                                                encoding => "UTF-8" );
 938
 939     my @words = split( / /, $operand );
 940     my @stems = $stemmer->stem(\@words);
 941     for my $stem (@stems) {
 942         $stemmed_operand .= "$stem";
 943         $stemmed_operand .= "?"
 944           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 945         $stemmed_operand .= " ";
 946     }
 947     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 948     return $stemmed_operand;
 949 }
 950
 951 # FIELD WEIGHTING
 952 sub _build_weighted_query {
 953
 954 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 955 # pretty well but could work much better if we had a smarter query parser
 956     my ( $operand, $stemmed_operand, $index ) = @_;
 957     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 958     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 959     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 960     $operand =~ s/"/ /g;    # Bug 7518: searches with quotation marks don't work
 961
 962     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 963
 964     # Keyword, or, no index specified
 965     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 966         $weighted_query .=
 967           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 968         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 969         $weighted_query .= " or Title-cover,phr,r3=\"$operand\"";    # phrase title
 970         $weighted_query .= " or ti,wrdl,r4=\"$operand\"";    # words in title
 971           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 972           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 973         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 974           if $fuzzy_enabled;    # add fuzzy, word list
 975         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 976           if ( $stemming and $stemmed_operand )
 977           ;                     # add stemming, right truncation
 978         $weighted_query .= " or wrdl,r9=\"$operand\"";
 979
 980         # embedded sorting: 0 a-z; 1 z-a
 981         # $weighted_query .= ") or (sort1,aut=1";
 982     }
 983
 984     # Barcode searches should skip this process
 985     elsif ( $index eq 'bc' ) {
 986         $weighted_query .= "bc=\"$operand\"";
 987     }
 988
 989     # Authority-number searches should skip this process
 990     elsif ( $index eq 'an' ) {
 991         $weighted_query .= "an=\"$operand\"";
 992     }
 993
 994     # If the index is numeric, don't autoquote it.
 995     elsif ( $index =~ /,st-numeric$/ ) {
 996         $weighted_query .= " $index=$operand";
 997     }
 998
 999     # If the index already has more than one qualifier, wrap the operand
1000     # in quotes and pass it back (assumption is that the user knows what they
1001     # are doing and won't appreciate us mucking up their query
1002     elsif ( $index =~ ',' ) {
1003         $weighted_query .= " $index=\"$operand\"";
1004     }
1005
1006     #TODO: build better cases based on specific search indexes
1007     else {
1008         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
1009           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
1010         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
1011         $weighted_query .= " or $index,wrdl,r6=\"$operand\"";    # word list index
1012         $weighted_query .= " or $index,wrdl,fuzzy,r8=\"$operand\""
1013           if $fuzzy_enabled;    # add fuzzy, word list
1014         $weighted_query .= " or $index,wrdl,rt,r9=\"$stemmed_operand\""
1015           if ( $stemming and $stemmed_operand );    # add stemming, right truncation
1016     }
1017
1018     $weighted_query .= "))";                       # close rank specification
1019     return $weighted_query;
1020 }
1021
1022 =head2 getIndexes
1023
1024 Return an array with available indexes.
1025
1026 =cut
1027
1028 sub getIndexes{
1029     my @indexes = (
1030                     # biblio indexes
1031                     'ab',
1032                     'Abstract',
1033                     'acqdate',
1034                     'allrecords',
1035                     'an',
1036                     'Any',
1037                     'at',
1038                     'arl',
1039                     'arp',
1040                     'au',
1041                     'aub',
1042                     'aud',
1043                     'audience',
1044                     'auo',
1045                     'aut',
1046                     'Author',
1047                     'Author-in-order ',
1048                     'Author-personal-bibliography',
1049                     'Authority-Number',
1050                     'authtype',
1051                     'bc',
1052                     'Bib-level',
1053                     'biblionumber',
1054                     'bio',
1055                     'biography',
1056                     'callnum',
1057                     'cfn',
1058                     'Chronological-subdivision',
1059                     'cn-bib-source',
1060                     'cn-bib-sort',
1061                     'cn-class',
1062                     'cn-item',
1063                     'cn-prefix',
1064                     'cn-suffix',
1065                     'cpn',
1066                     'Code-institution',
1067                     'Conference-name',
1068                     'Conference-name-heading',
1069                     'Conference-name-see',
1070                     'Conference-name-seealso',
1071                     'Content-type',
1072                     'Control-number',
1073                     'copydate',
1074                     'Corporate-name',
1075                     'Corporate-name-heading',
1076                     'Corporate-name-see',
1077                     'Corporate-name-seealso',
1078                     'Country-publication',
1079                     'ctype',
1080                     'curriculum',
1081                     'date-entered-on-file',
1082                     'Date-of-acquisition',
1083                     'Date-of-publication',
1084                     'Date-time-last-modified',
1085                     'Dewey-classification',
1086                     'Dissertation-information',
1087                     'diss',
1088                     'dtlm',
1089                     'EAN',
1090                     'extent',
1091                     'fic',
1092                     'fiction',
1093                     'Form-subdivision',
1094                     'format',
1095                     'Geographic-subdivision',
1096                     'he',
1097                     'Heading',
1098                     'Heading-use-main-or-added-entry',
1099                     'Heading-use-series-added-entry ',
1100                     'Heading-use-subject-added-entry',
1101                     'Host-item',
1102                     'id-other',
1103                     'ident',
1104                     'Identifier-standard',
1105                     'Illustration-code',
1106                     'Index-term-genre',
1107                     'Index-term-uncontrolled',
1108                     'Interest-age-level',
1109                     'Interest-grade-level',
1110                     'ISBN',
1111                     'isbn',
1112                     'ISSN',
1113                     'issn',
1114                     'itemtype',
1115                     'kw',
1116                     'Koha-Auth-Number',
1117                     'l-format',
1118                     'language',
1119                     'language-original',
1120                     'lc-card',
1121                     'LC-card-number',
1122                     'lcn',
1123                     'lex',
1124                     'lexile-number',
1125                     'llength',
1126                     'ln',
1127                     'ln-audio',
1128                     'ln-subtitle',
1129                     'Local-classification',
1130                     'Local-number',
1131                     'Match-heading',
1132                     'Match-heading-see-from',
1133                     'Material-type',
1134                     'mc-itemtype',
1135                     'mc-rtype',
1136                     'mus',
1137                     'name',
1138                     'Music-number',
1139                     'Name-geographic',
1140                     'Name-geographic-heading',
1141                     'Name-geographic-see',
1142                     'Name-geographic-seealso',
1143                     'nb',
1144                     'Note',
1145                     'notes',
1146                     'ns',
1147                     'nt',
1148                     'Other-control-number',
1149                     'pb',
1150                     'Personal-name',
1151                     'Personal-name-heading',
1152                     'Personal-name-see',
1153                     'Personal-name-seealso',
1154                     'pl',
1155                     'Place-publication',
1156                     'pn',
1157                     'popularity',
1158                     'pubdate',
1159                     'Publisher',
1160                     'Provider',
1161                     'pv',
1162                     'Reading-grade-level',
1163                     'Record-control-number',
1164                     'rcn',
1165                     'Record-type',
1166                     'rtype',
1167                     'se',
1168                     'See',
1169                     'See-also',
1170                     'sn',
1171                     'Stock-number',
1172                     'su',
1173                     'Subject',
1174                     'Subject-heading-thesaurus',
1175                     'Subject-name-personal',
1176                     'Subject-subdivision',
1177                     'Summary',
1178                     'Suppress',
1179                     'su-geo',
1180                     'su-na',
1181                     'su-to',
1182                     'su-ut',
1183                     'ut',
1184                     'Term-genre-form',
1185                     'Term-genre-form-heading',
1186                     'Term-genre-form-see',
1187                     'Term-genre-form-seealso',
1188                     'ti',
1189                     'Title',
1190                     'Title-cover',
1191                     'Title-series',
1192                     'Title-uniform',
1193                     'Title-uniform-heading',
1194                     'Title-uniform-see',
1195                     'Title-uniform-seealso',
1196                     'totalissues',
1197                     'yr',
1198
1199                     # items indexes
1200                     'acqsource',
1201                     'barcode',
1202                     'bc',
1203                     'branch',
1204                     'ccode',
1205                     'classification-source',
1206                     'cn-sort',
1207                     'coded-location-qualifier',
1208                     'copynumber',
1209                     'damaged',
1210                     'datelastborrowed',
1211                     'datelastseen',
1212                     'holdingbranch',
1213                     'homebranch',
1214                     'issues',
1215                     'item',
1216                     'itemnumber',
1217                     'itype',
1218                     'Local-classification',
1219                     'location',
1220                     'lost',
1221                     'materials-specified',
1222                     'mc-ccode',
1223                     'mc-itype',
1224                     'mc-loc',
1225                     'notforloan',
1226                     'Number-local-acquisition',
1227                     'onloan',
1228                     'price',
1229                     'renewals',
1230                     'replacementprice',
1231                     'replacementpricedate',
1232                     'reserves',
1233                     'restricted',
1234                     'stack',
1235                     'stocknumber',
1236                     'inv',
1237                     'uri',
1238                     'withdrawn',
1239
1240                     # subject related
1241                   );
1242
1243     return \@indexes;
1244 }
1245
1246 =head2 buildQuery
1247
1248 ( $error, $query,
1249 $simple_query, $query_cgi,
1250 $query_desc, $limit,
1251 $limit_cgi, $limit_desc,
1252 $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
1253
1254 Build queries and limits in CCL, CGI, Human,
1255 handle truncation, stemming, field weighting, fuzziness, etc.
1256
1257 See verbose embedded documentation.
1258
1259
1260 =cut
1261
1262 sub buildQuery {
1263     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
1264     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
1265
1266     my $query_desc;
1267
1268     # dereference
1269     my @operators = $operators ? @$operators : ();
1270     my @indexes   = $indexes   ? @$indexes   : ();
1271     my @operands  = $operands  ? @$operands  : ();
1272     my @limits    = $limits    ? @$limits    : ();
1273     my @sort_by   = $sort_by   ? @$sort_by   : ();
1274
1275     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
1276     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
1277     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
1278     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
1279
1280     my $query        = $operands[0];
1281     my $simple_query = $operands[0];
1282
1283     # initialize the variables we're passing back
1284     my $query_cgi;
1285     my $query_type;
1286
1287     my $limit;
1288     my $limit_cgi;
1289     my $limit_desc;
1290
1291     my $cclq       = 0;
1292     my $cclindexes = getIndexes();
1293     if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) {
1294         while ( !$cclq && $query =~ /(?:^|\W)([\w-]+)(,[\w-]+)*[:=]/g ) {
1295             my $dx = lc($1);
1296             $cclq = grep { lc($_) eq $dx } @$cclindexes;
1297         }
1298         $query = "ccl=$query" if $cclq;
1299     }
1300
1301 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1302 # DIAGNOSTIC ONLY!!
1303     if ( $query =~ /^ccl=/ ) {
1304         my $q=$';
1305         # This is needed otherwise ccl= and &limit won't work together, and
1306         # this happens when selecting a subject on the opac-detail page
1307         @limits = grep {!/^$/} @limits;
1308         my $original_q = $q; # without available part
1309         unless ( grep { $_ eq 'available' } @limits ) {
1310             $q =~ s| and \( \(allrecords,AlwaysMatches=''\) and \(not-onloan-count,st-numeric >= 1\) and \(lost,st-numeric=0\) \)||;
1311             $original_q = $q;
1312         }
1313         if ( @limits ) {
1314             if ( grep { $_ eq 'available' } @limits ) {
1315                 $q .= q| and ( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )|;
1316                 @limits = grep {!/^available$/} @limits;
1317             }
1318             $q .= ' and '.join(' and ', @limits) if @limits;
1319         }
1320         return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $original_q, '', '', '', 'ccl' );
1321     }
1322     if ( $query =~ /^cql=/ ) {
1323         return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', 'cql' );
1324     }
1325     if ( $query =~ /^pqf=/ ) {
1326         $query_desc = $';
1327         $query_cgi = "q=pqf=".uri_escape_utf8($');
1328         return ( undef, $', $', $query_cgi, $query_desc, '', '', '', 'pqf' );
1329     }
1330
1331     # pass nested queries directly
1332     # FIXME: need better handling of some of these variables in this case
1333     # Nested queries aren't handled well and this implementation is flawed and causes users to be
1334     # unable to search for anything containing () commenting out, will be rewritten for 3.4.0
1335 #    if ( $query =~ /(\(|\))/ ) {
1336 #        return (
1337 #            undef,              $query, $simple_query, $query_cgi,
1338 #            $query,             $limit, $limit_cgi,    $limit_desc,
1339 #            'ccl'
1340 #        );
1341 #    }
1342
1343 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1344 # query operands and indexes and add stemming, truncation, field weighting, etc.
1345 # Once we do so, we'll end up with a value in $query, just like if we had an
1346 # incoming $query from the user
1347     else {
1348         $query = ""
1349           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1350         my $previous_operand
1351           ;    # a flag used to keep track if there was a previous query
1352                # if there was, we can apply the current operator
1353                # for every operand
1354         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1355
1356             # COMBINE OPERANDS, INDEXES AND OPERATORS
1357             if ( ($operands[$i] // '') ne '' ) {
1358                 $operands[$i]=~s/^\s+//;
1359
1360               # A flag to determine whether or not to add the index to the query
1361                 my $indexes_set;
1362
1363 # If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling
1364                 if ( $operands[$i] =~ /\w(:|=)/ || $scan ) {
1365                     $weight_fields    = 0;
1366                     $stemming         = 0;
1367                 } else {
1368                     $operands[$i] =~ s/\?/{?}/g; # need to escape question marks
1369                 }
1370                 my $operand = $operands[$i];
1371                 my $index   = $indexes[$i] || 'kw';
1372
1373                 # Add index-specific attributes
1374
1375                 #Afaik, this 'yr' condition will only ever be met in the staff interface advanced search
1376                 #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query,
1377                 #which is processed higher up in this sub. Other than that, year searches are typically
1378                 #handled as limits which are not processed her either.
1379
1380                 # Search ranges: Date of Publication, st-numeric
1381                 if ( $index =~ /(yr|st-numeric)/ ) {
1382                     #weight_fields/relevance search causes errors with date ranges
1383                     #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
1384                     #In the case of YYYY-YYYY, it will return no results
1385                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1386                 }
1387
1388                 # Date of Acquisition
1389                 elsif ( $index =~ /acqdate/ ) {
1390                     #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format
1391                     #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the
1392                       #top of the results just because they have lots of item records matching that date.
1393                     #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
1394                       #irrelevant here
1395                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1396                 }
1397                 # ISBN,ISSN,Standard Number, don't need special treatment
1398                 elsif ( $index eq 'nb' || $index eq 'ns' || $index eq 'hi' ) {
1399                     (
1400                         $stemming,      $auto_truncation,
1401                         $weight_fields, $fuzzy_enabled
1402                     ) = ( 0, 0, 0, 0 );
1403
1404                     if ( $index eq 'nb' ) {
1405                         if ( C4::Context->preference("SearchWithISBNVariations") ) {
1406                             my @isbns = C4::Koha::GetVariationsOfISBN( $operand );
1407                             $operands[$i] = $operand =  '(nb=' . join(' OR nb=', @isbns) . ')';
1408                             $indexes[$i] = $index = 'kw';
1409                         }
1410                     }
1411                 }
1412
1413                 # Set default structure attribute (word list)
1414                 my $struct_attr = q{};
1415                 unless ( $indexes_set || $index =~ /,(st-|phr|ext|wrdl)/ || $index =~ /^(nb|ns)$/ ) {
1416                     $struct_attr = ",wrdl";
1417                 }
1418
1419                 # Some helpful index variants
1420                 my $index_plus       = $index . $struct_attr . ':';
1421                 my $index_plus_comma = $index . $struct_attr . ',';
1422
1423                 if ($auto_truncation){
1424                         unless ( $index =~ /,(st-|phr|ext)/ ) {
1425                                                 #FIXME only valid with LTR scripts
1426                                                 $operand=join(" ",map{
1427                                                                                         (index($_,"*")>0?"$_":"$_*")
1428                                                                                          }split (/\s+/,$operand));
1429                                                 warn $operand if $DEBUG;
1430                                         }
1431                                 }
1432
1433                 # Detect Truncation
1434                 my $truncated_operand;
1435                 my( $nontruncated, $righttruncated, $lefttruncated,
1436                     $rightlefttruncated, $regexpr
1437                 ) = _detect_truncation( $operand, $index );
1438                 warn
1439 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1440                   if $DEBUG;
1441
1442                 # Apply Truncation
1443                 if (
1444                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1445                     scalar(@$rightlefttruncated) > 0 )
1446                 {
1447
1448                # Don't field weight or add the index to the query, we do it here
1449                     $indexes_set = 1;
1450                     undef $weight_fields;
1451                     my $previous_truncation_operand;
1452                     if (scalar @$nontruncated) {
1453                         $truncated_operand .= "$index_plus @$nontruncated ";
1454                         $previous_truncation_operand = 1;
1455                     }
1456                     if (scalar @$righttruncated) {
1457                         $truncated_operand .= "and " if $previous_truncation_operand;
1458                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1459                         $previous_truncation_operand = 1;
1460                     }
1461                     if (scalar @$lefttruncated) {
1462                         $truncated_operand .= "and " if $previous_truncation_operand;
1463                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1464                         $previous_truncation_operand = 1;
1465                     }
1466                     if (scalar @$rightlefttruncated) {
1467                         $truncated_operand .= "and " if $previous_truncation_operand;
1468                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1469                         $previous_truncation_operand = 1;
1470                     }
1471                 }
1472                 $operand = $truncated_operand if $truncated_operand;
1473                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1474
1475                 # Handle Stemming
1476                 my $stemmed_operand;
1477                 $stemmed_operand = _build_stemmed_operand($operand, $lang)
1478                                                                                 if $stemming;
1479
1480                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1481
1482                 # Handle Field Weighting
1483                 my $weighted_operand;
1484                 if ($weight_fields) {
1485                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1486                     $operand = $weighted_operand;
1487                     $indexes_set = 1;
1488                 }
1489
1490                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1491
1492                 #Use relevance ranking when not using a weighted query (which adds relevance ranking of its own)
1493
1494                 #N.B. Truncation is mutually exclusive with Weighted Queries,
1495                 #so even if QueryWeightFields is turned on, QueryAutoTruncate will turn it off, thus
1496                 #the need for this relevance wrapper.
1497                 $operand = "(rk=($operand))" unless $weight_fields;
1498
1499                 ($query,$query_cgi,$query_desc,$previous_operand) = _build_initial_query({
1500                     query => $query,
1501                     query_cgi => $query_cgi,
1502                     query_desc => $query_desc,
1503                     operator => ($operators[ $i - 1 ]) ? $operators[ $i - 1 ] : '',
1504                     parsed_operand => $operand,
1505                     original_operand => $operands[$i] // '',
1506                     index => $index,
1507                     index_plus => $index_plus,
1508                     indexes_set => $indexes_set,
1509                     previous_operand => $previous_operand,
1510                 });
1511
1512             }    #/if $operands
1513         }    # /for
1514     }
1515     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1516
1517     # add limits
1518     my %group_OR_limits;
1519     my $availability_limit;
1520     foreach my $this_limit (@limits) {
1521         next unless $this_limit;
1522         if ( $this_limit =~ /available/ ) {
1523 #
1524 ## 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1525 ## In English:
1526 ## all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1527             $availability_limit .=
1528 "( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )";
1529             $limit_cgi  .= "&limit=available";
1530             $limit_desc .= "";
1531         }
1532
1533         # group_OR_limits, prefixed by mc-
1534         # OR every member of the group
1535         elsif ( $this_limit =~ /mc/ ) {
1536             my ($k,$v) = split(/:/, $this_limit,2);
1537             if ( $k !~ /mc-i(tem)?type/ ) {
1538                 # in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes
1539                 $this_limit =~ tr/"//d;
1540                 $this_limit = $k.':"'.$v.'"';
1541             }
1542
1543             $group_OR_limits{$k} .= " or " if $group_OR_limits{$k};
1544             $limit_desc      .= " or " if $group_OR_limits{$k};
1545             $group_OR_limits{$k} .= "$this_limit";
1546             $limit_cgi       .= "&limit=" . uri_escape_utf8($this_limit);
1547             $limit_desc      .= " $this_limit";
1548         }
1549
1550         # Regular old limits
1551         else {
1552             $limit .= " and " if $limit || $query;
1553             $limit      .= "$this_limit";
1554             $limit_cgi  .= "&limit=" . uri_escape_utf8($this_limit);
1555             if ($this_limit =~ /^branch:(.+)/) {
1556                 my $branchcode = $1;
1557                 my $library = Koha::Libraries->find( $branchcode );
1558                 if (defined $library) {
1559                     $limit_desc .= " branch:" . $library->branchname;
1560                 } else {
1561                     $limit_desc .= " $this_limit";
1562                 }
1563             } else {
1564                 $limit_desc .= " $this_limit";
1565             }
1566         }
1567     }
1568     foreach my $k (keys (%group_OR_limits)) {
1569         $limit .= " and " if ( $query || $limit );
1570         $limit .= "($group_OR_limits{$k})";
1571     }
1572     if ($availability_limit) {
1573         $limit .= " and " if ( $query || $limit );
1574         $limit .= "($availability_limit)";
1575     }
1576
1577     # Normalize the query and limit strings
1578     # This is flawed , means we can't search anything with : in it
1579     # if user wants to do ccl or cql, start the query with that
1580 #    $query =~ s/:/=/g;
1581     #NOTE: We use several several different regexps here as you can't have variable length lookback assertions
1582     $query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g;
1583     $query =~ s/(?<=(wrdl)):/=/g;
1584     $query =~ s/(?<=(trn|phr)):/=/g;
1585     $query =~ s/(?<=(st-numeric)):/=/g;
1586     $query =~ s/(?<=(st-year)):/=/g;
1587     $query =~ s/(?<=(st-date-normalized)):/=/g;
1588
1589     # Removing warnings for later substitutions
1590     $query      //= q{};
1591     $query_desc //= q{};
1592     $query_cgi  //= q{};
1593     $limit      //= q{};
1594     $limit_desc //= q{};
1595     $limit =~ s/:/=/g;
1596     for ( $query, $query_desc, $limit, $limit_desc ) {
1597         s/  +/ /g;    # remove extra spaces
1598         s/^ //g;     # remove any beginning spaces
1599         s/ $//g;     # remove any ending spaces
1600         s/==/=/g;    # remove double == from query
1601     }
1602     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1603
1604     for ($query_cgi,$simple_query) {
1605         s/"//g;
1606     }
1607     # append the limit to the query
1608     $query .= " " . $limit;
1609
1610     # Warnings if DEBUG
1611     if ($DEBUG) {
1612         warn "QUERY:" . $query;
1613         warn "QUERY CGI:" . $query_cgi;
1614         warn "QUERY DESC:" . $query_desc;
1615         warn "LIMIT:" . $limit;
1616         warn "LIMIT CGI:" . $limit_cgi;
1617         warn "LIMIT DESC:" . $limit_desc;
1618         warn "---------\nLeave buildQuery\n---------";
1619     }
1620
1621     return (
1622         undef,              $query, $simple_query, $query_cgi,
1623         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1624         $query_type
1625     );
1626 }
1627
1628 =head2 _build_initial_query
1629
1630   ($query, $query_cgi, $query_desc, $previous_operand) = _build_initial_query($initial_query_params);
1631
1632   Build a section of the initial query containing indexes, operators, and operands.
1633
1634 =cut
1635
1636 sub _build_initial_query {
1637     my ($params) = @_;
1638
1639     my $operator = "";
1640     if ($params->{previous_operand}){
1641         #If there is a previous operand, add a supplied operator or the default 'and'
1642         $operator = ($params->{operator}) ? " ".($params->{operator})." " : ' and ';
1643     }
1644
1645     #NOTE: indexes_set is typically set when doing truncation or field weighting
1646     my $operand = ($params->{indexes_set}) ? $params->{parsed_operand} : $params->{index_plus}.$params->{parsed_operand};
1647
1648     #e.g. "kw,wrdl:test"
1649     #e.g. " and kw,wrdl:test"
1650     $params->{query} .= $operator . $operand;
1651
1652     $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator;
1653     $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index};
1654     $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if $params->{original_operand};
1655
1656     #e.g. " and kw,wrdl: test"
1657     $params->{query_desc} .= $operator . ( $params->{index_plus} // q{} ) . " " . ( $params->{original_operand} // q{} );
1658
1659     $params->{previous_operand} = 1 unless $params->{previous_operand}; #If there is no previous operand, mark this as one
1660
1661     return ($params->{query}, $params->{query_cgi}, $params->{query_desc}, $params->{previous_operand});
1662 }
1663
1664 =head2 searchResults
1665
1666   my @search_results = searchResults($search_context, $searchdesc, $hits,
1667                                      $results_per_page, $offset, $scan,
1668                                      @marcresults);
1669
1670 Format results in a form suitable for passing to the template
1671
1672 =cut
1673
1674 # IMO this subroutine is pretty messy still -- it's responsible for
1675 # building the HTML output for the template
1676 sub searchResults {
1677     my ( $search_context, $searchdesc, $hits, $results_per_page, $offset, $scan, $marcresults, $xslt_variables ) = @_;
1678     my $dbh = C4::Context->dbh;
1679     my @newresults;
1680
1681     require C4::Items;
1682
1683     $search_context->{'interface'} = 'opac' if !$search_context->{'interface'} || $search_context->{'interface'} ne 'intranet';
1684     my ($is_opac, $hidelostitems);
1685     if ($search_context->{'interface'} eq 'opac') {
1686         $hidelostitems = C4::Context->preference('hidelostitems');
1687         $is_opac       = 1;
1688     }
1689
1690     my $record_processor = Koha::RecordProcessor->new({
1691         filters => 'ViewPolicy'
1692     });
1693
1694     #Build branchnames hash
1695     my %branches = map { $_->branchcode => $_->branchname } Koha::Libraries->search({}, { order_by => 'branchname' });
1696
1697 # FIXME - We build an authorised values hash here, using the default framework
1698 # though it is possible to have different authvals for different fws.
1699
1700     my $shelflocations =
1701       { map { $_->{authorised_value} => $_->{lib} } Koha::AuthorisedValues->get_descriptions_by_koha_field( { frameworkcode => '', kohafield => 'items.location' } ) };
1702
1703     # get notforloan authorised value list (see $shelflocations  FIXME)
1704     my $av = Koha::MarcSubfieldStructures->search({ frameworkcode => '', kohafield => 'items.notforloan', authorised_value => [ -and => {'!=' => undef }, {'!=' => ''}] });
1705     my $notforloan_authorised_value = $av->count ? $av->next->authorised_value : undef;
1706
1707     #Get itemtype hash
1708     my $itemtypes = Koha::ItemTypes->search_with_localization;
1709     my %itemtypes = map { $_->{itemtype} => $_ } @{ $itemtypes->unblessed };
1710
1711     #search item field code
1712     my ($itemtag, undef) = &GetMarcFromKohaField( "items.itemnumber" );
1713
1714     ## find column names of items related to MARC
1715     my %subfieldstosearch;
1716     my @columns = Koha::Database->new()->schema()->resultset('Item')->result_source->columns;
1717     for my $column ( @columns ) {
1718         my ( $tagfield, $tagsubfield ) =
1719           &GetMarcFromKohaField( "items." . $column );
1720         if ( defined $tagsubfield ) {
1721             $subfieldstosearch{$column} = $tagsubfield;
1722         }
1723     }
1724
1725     # handle which records to actually retrieve
1726     my $times;
1727     if ( $hits && $offset + $results_per_page <= $hits ) {
1728         $times = $offset + $results_per_page;
1729     }
1730     else {
1731         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1732     }
1733
1734     my $marcflavour = C4::Context->preference("marcflavour");
1735     # We get the biblionumber position in MARC
1736     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField( 'biblio.biblionumber' );
1737
1738     # set stuff for XSLT processing here once, not later again for every record we retrieved
1739     my $xslfile;
1740     my $xslsyspref;
1741     if( $is_opac ){
1742         $xslsyspref = "OPACXSLTResultsDisplay";
1743         $xslfile = C4::Context->preference( $xslsyspref );
1744     } else {
1745         $xslsyspref = "XSLTResultsDisplay";
1746         $xslfile = C4::Context->preference( $xslsyspref ) || "default";
1747     }
1748     my $lang   = $xslfile ? C4::Languages::getlanguage()  : undef;
1749     my $sysxml = $xslfile ? C4::XSLT::get_xslt_sysprefs() : undef;
1750
1751     my $userenv = C4::Context->userenv;
1752     my $logged_in_user
1753         = ( defined $userenv and $userenv->{number} )
1754         ? Koha::Patrons->find( $userenv->{number} )
1755         : undef;
1756     my $patron_category_hide_lost_items = ($logged_in_user) ? $logged_in_user->category->hidelostitems : 0;
1757
1758     # loop through all of the records we've retrieved
1759     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1760
1761         my $marcrecord;
1762         if ($scan) {
1763             # For Scan searches we built USMARC data
1764             $marcrecord = MARC::Record->new_from_usmarc( $marcresults->[$i]);
1765         } else {
1766             # Normal search, render from Zebra's output
1767             $marcrecord = new_record_from_zebra(
1768                 'biblioserver',
1769                 $marcresults->[$i]
1770             );
1771
1772             if ( ! defined $marcrecord ) {
1773                 warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i];
1774                 next;
1775             }
1776         }
1777
1778         my $fw = $scan
1779              ? undef
1780              : $bibliotag < 10
1781                ? GetFrameworkCode($marcrecord->field($bibliotag)->data)
1782                : GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
1783
1784         SetUTF8Flag($marcrecord);
1785         my $oldbiblio = TransformMarcToKoha( $marcrecord, $fw );
1786         $oldbiblio->{result_number} = $i + 1;
1787
1788                 $oldbiblio->{normalized_upc}  = GetNormalizedUPC(       $marcrecord,$marcflavour);
1789                 $oldbiblio->{normalized_ean}  = GetNormalizedEAN(       $marcrecord,$marcflavour);
1790                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1791                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1792                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1793
1794                 # edition information, if any
1795         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1796
1797         my $itemtype = $oldbiblio->{itemtype} ? $itemtypes{$oldbiblio->{itemtype}} : undef;
1798         # add imageurl to itemtype if there is one
1799         $oldbiblio->{imageurl} = $itemtype ? getitemtypeimagelocation( $search_context->{'interface'}, $itemtype->{imageurl} ) : q{};
1800         # Build summary if there is one (the summary is defined in the itemtypes table)
1801         $oldbiblio->{description} = $itemtype ? $itemtype->{translated_description} : q{};
1802
1803         # FIXME: this is only used in the deprecated non-XLST opac results
1804         if ( !$xslfile && $is_opac && $itemtype && $itemtype->{summary} ) {
1805             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1806             my @fields  = $marcrecord->fields();
1807
1808             my $newsummary;
1809             foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
1810                 my $tags = {};
1811                 foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
1812                     $tag =~ /(.{3})(.)/;
1813                     if($marcrecord->field($1)){
1814                         my @abc = $marcrecord->field($1)->subfield($2);
1815                         $tags->{$tag} = $#abc + 1 ;
1816                     }
1817                 }
1818
1819                 # We catch how many times to repeat this line
1820                 my $max = 0;
1821                 foreach my $tag (keys(%$tags)){
1822                     $max = $tags->{$tag} if($tags->{$tag} > $max);
1823                  }
1824
1825                 # we replace, and repeat each line
1826                 for (my $i = 0 ; $i < $max ; $i++){
1827                     my $newline = $line;
1828
1829                     foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
1830                         $tag =~ /(.{3})(.)/;
1831
1832                         if($marcrecord->field($1)){
1833                             my @repl = $marcrecord->field($1)->subfield($2);
1834                             my $subfieldvalue = $repl[$i];
1835                             $newline =~ s/\[$tag\]/$subfieldvalue/g;
1836                         }
1837                     }
1838                     $newsummary .= "$newline\n";
1839                 }
1840             }
1841
1842             $newsummary =~ s/\[(.*?)]//g;
1843             $newsummary =~ s/\n/<br\/>/g;
1844             $oldbiblio->{summary} = $newsummary;
1845         }
1846
1847         # Pull out the items fields
1848         my @fields = $marcrecord->field($itemtag);
1849         my $marcflavor = C4::Context->preference("marcflavour");
1850
1851         # adding linked items that belong to host records
1852         if ( C4::Context->preference('EasyAnalyticalRecords') ) {
1853             my $analyticsfield = '773';
1854             if ($marcflavor eq 'MARC21' || $marcflavor eq 'NORMARC') {
1855                 $analyticsfield = '773';
1856             } elsif ($marcflavor eq 'UNIMARC') {
1857                 $analyticsfield = '461';
1858             }
1859             foreach my $hostfield ( $marcrecord->field($analyticsfield)) {
1860                 my $hostbiblionumber = $hostfield->subfield("0");
1861                 my $linkeditemnumber = $hostfield->subfield("9");
1862                 if( $hostbiblionumber ) {
1863                     my $linkeditemmarc = C4::Items::GetMarcItem( $hostbiblionumber, $linkeditemnumber );
1864                     if ($linkeditemmarc) {
1865                         my $linkeditemfield = $linkeditemmarc->field($itemtag);
1866                         if ($linkeditemfield) {
1867                             push( @fields, $linkeditemfield );
1868                         }
1869                     }
1870                 }
1871             }
1872         }
1873
1874         # Setting item statuses for display
1875         my @available_items_loop;
1876         my @onloan_items_loop;
1877         my @other_items_loop;
1878
1879         my $available_items;
1880         my $onloan_items;
1881         my $other_items;
1882
1883         my $ordered_count         = 0;
1884         my $available_count       = 0;
1885         my $onloan_count          = 0;
1886         my $longoverdue_count     = 0;
1887         my $other_count           = 0;
1888         my $withdrawn_count        = 0;
1889         my $itemlost_count        = 0;
1890         my $hideatopac_count      = 0;
1891         my $itembinding_count     = 0;
1892         my $itemdamaged_count     = 0;
1893         my $item_in_transit_count = 0;
1894         my $can_place_holds       = 0;
1895         my $item_onhold_count     = 0;
1896         my $notforloan_count      = 0;
1897         my $items_count           = scalar(@fields);
1898         my $maxitems_pref = C4::Context->preference('maxItemsinSearchResults');
1899         my $maxitems = $maxitems_pref ? $maxitems_pref - 1 : 1;
1900         my @hiddenitems; # hidden itemnumbers based on OpacHiddenItems syspref
1901
1902         # loop through every item
1903         foreach my $field (@fields) {
1904             my $item;
1905
1906             # populate the items hash
1907             foreach my $code ( keys %subfieldstosearch ) {
1908                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1909             }
1910             $item->{description} = $itemtypes{ $item->{itype} }{translated_description} if $item->{itype};
1911
1912                 # OPAC hidden items
1913             if ($is_opac) {
1914                 # hidden because lost
1915                 if ($hidelostitems && $item->{itemlost}) {
1916                     $hideatopac_count++;
1917                     next;
1918                 }
1919                 # hidden based on OpacHiddenItems syspref
1920                 my @hi = C4::Items::GetHiddenItemnumbers({ items=> [ $item ], borcat => $search_context->{category} });
1921                 if (scalar @hi) {
1922                     push @hiddenitems, @hi;
1923                     $hideatopac_count++;
1924                     next;
1925                 }
1926             }
1927
1928             my $hbranch     = C4::Context->preference('StaffSearchResultsDisplayBranch');
1929             my $otherbranch = $hbranch eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1930
1931             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1932             if ($item->{$hbranch}) {
1933                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1934             }
1935             elsif ($item->{$otherbranch}) {     # Last resort
1936                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1937             }
1938
1939             my $prefix =
1940                 ( $item->{$hbranch} ? $item->{$hbranch} . '--' : q{} )
1941               . ( $item->{location} ? $item->{location} : q{} )
1942               . ( $item->{itype}    ? $item->{itype}    : q{} )
1943               . ( $item->{itemcallnumber} ? $item->{itemcallnumber} : q{} );
1944 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1945             if ( $item->{onloan}
1946                 and $logged_in_user
1947                 and !( $patron_category_hide_lost_items and $item->{itemlost} ) )
1948             {
1949                 $onloan_count++;
1950                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1951                 $onloan_items->{$key}->{due_date} = $item->{onloan};
1952                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1953                 $onloan_items->{$key}->{branchname}     = $item->{branchname};
1954                 $onloan_items->{$key}->{location}       = $shelflocations->{ $item->{location} };
1955                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1956                 $onloan_items->{$key}->{description}    = $item->{description};
1957                 $onloan_items->{$key}->{imageurl} =
1958                   getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype} }->{imageurl} );
1959
1960                 # if something's checked out and lost, mark it as 'long overdue'
1961                 if ( $item->{itemlost} ) {
1962                     $onloan_items->{$key}->{longoverdue}++;
1963                     $longoverdue_count++;
1964                 }
1965                 else {    # can place holds as long as item isn't lost
1966                     $can_place_holds = 1;
1967                 }
1968             }
1969
1970          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1971             else {
1972
1973                 my $itemtype = C4::Context->preference("item-level_itypes")? $item->{itype}: $oldbiblio->{itemtype};
1974                 $item->{notforloan} = 1 if !$item->{notforloan} &&
1975                     $itemtype && $itemtypes{ $itemtype }->{notforloan};
1976
1977                 # item is on order
1978                 if ( $item->{notforloan} < 0 ) {
1979                     $ordered_count++;
1980                 } elsif ( $item->{notforloan} > 0 ) {
1981                     $notforloan_count++;
1982                 }
1983
1984                 # is item in transit?
1985                 my $transfertwhen = '';
1986                 my ($transfertfrom, $transfertto);
1987
1988                 # is item on the reserve shelf?
1989                 my $reservestatus = '';
1990
1991                 unless ($item->{withdrawn}
1992                         || $item->{itemlost}
1993                         || $item->{damaged}
1994                         || $item->{notforloan}
1995                         || ( C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck')
1996                         && $items_count > C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') ) ) {
1997
1998                     # A couple heuristics to limit how many times
1999                     # we query the database for item transfer information, sacrificing
2000                     # accuracy in some cases for speed;
2001                     #
2002                     # 1. don't query if item has one of the other statuses
2003                     # 2. don't check transit status if the bib has
2004                     #    more than 20 items
2005                     #
2006                     # FIXME: to avoid having the query the database like this, and to make
2007                     #        the in transit status count as unavailable for search limiting,
2008                     #        should map transit status to record indexed in Zebra.
2009                     #
2010                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
2011                     $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} );
2012                 }
2013
2014                 # item is withdrawn, lost, damaged, not for loan, reserved or in transit
2015                 if (   $item->{withdrawn}
2016                     || $item->{itemlost}
2017                     || $item->{damaged}
2018                     || $item->{notforloan}
2019                     || $reservestatus eq 'Waiting'
2020                     || ($transfertwhen && $transfertwhen ne ''))
2021                 {
2022                     $withdrawn_count++        if $item->{withdrawn};
2023                     $itemlost_count++        if $item->{itemlost};
2024                     $itemdamaged_count++     if $item->{damaged};
2025                     $item_in_transit_count++ if $transfertwhen && $transfertwhen ne '';
2026                     $item_onhold_count++     if $reservestatus eq 'Waiting';
2027                     $item->{status} = ($item->{withdrawn}//q{}) . "-" . ($item->{itemlost}//q{}) . "-" . ($item->{damaged}//q{}) . "-" . ($item->{notforloan}//q{});
2028
2029                     # can place a hold on a item if
2030                     # not lost nor withdrawn
2031                     # not damaged unless AllowHoldsOnDamagedItems is true
2032                     # item is either for loan or on order (notforloan < 0)
2033                     $can_place_holds = 1
2034                       if (
2035                            !$item->{itemlost}
2036                         && !$item->{withdrawn}
2037                         && ( !$item->{damaged} || C4::Context->preference('AllowHoldsOnDamagedItems') )
2038                         && ( !$item->{notforloan} || $item->{notforloan} < 0 )
2039                       );
2040
2041                     $other_count++;
2042
2043                     my $key = $prefix . $item->{status};
2044                     foreach (qw(withdrawn itemlost damaged branchname itemcallnumber)) {
2045                         $other_items->{$key}->{$_} = $item->{$_};
2046                     }
2047                     $other_items->{$key}->{intransit} = ( $transfertwhen ne '' ) ? 1 : 0;
2048                     $other_items->{$key}->{onhold} = ($reservestatus) ? 1 : 0;
2049                     $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value and $item->{notforloan};
2050                     $other_items->{$key}->{count}++ if $item->{$hbranch};
2051                     $other_items->{$key}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2052                     $other_items->{$key}->{description} = $item->{description};
2053                     $other_items->{$key}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2054                 }
2055                 # item is available
2056                 else {
2057                     $can_place_holds = 1;
2058                     $available_count++;
2059                     $available_items->{$prefix}->{count}++ if $item->{$hbranch};
2060                     foreach (qw(branchname itemcallnumber description)) {
2061                         $available_items->{$prefix}->{$_} = $item->{$_};
2062                     }
2063                     $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2064                     $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2065                 }
2066             }
2067         }    # notforloan, item level and biblioitem level
2068
2069         # if all items are hidden, do not show the record
2070         if ($items_count > 0 && $hideatopac_count == $items_count) {
2071             next;
2072         }
2073
2074         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
2075         for my $key ( sort keys %$onloan_items ) {
2076             (++$onloanitemscount > $maxitems) and last;
2077             push @onloan_items_loop, $onloan_items->{$key};
2078         }
2079         for my $key ( sort keys %$other_items ) {
2080             (++$otheritemscount > $maxitems) and last;
2081             push @other_items_loop, $other_items->{$key};
2082         }
2083         for my $key ( sort keys %$available_items ) {
2084             (++$availableitemscount > $maxitems) and last;
2085             push @available_items_loop, $available_items->{$key}
2086         }
2087
2088         # XSLT processing of some stuff
2089         # we fetched the sysprefs already before the loop through all retrieved record!
2090         if (!$scan && $xslfile) {
2091             $record_processor->options({
2092                 frameworkcode => $fw,
2093                 interface     => $search_context->{'interface'}
2094             });
2095
2096             $record_processor->process($marcrecord);
2097             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, $xslsyspref, 1, \@hiddenitems, $sysxml, $xslfile, $lang, $xslt_variables);
2098         }
2099
2100         # if biblio level itypes are used and itemtype is notforloan, it can't be reserved either
2101         if (!C4::Context->preference("item-level_itypes")) {
2102             if ($itemtype && $itemtype->{notforloan}) {
2103                 $can_place_holds = 0;
2104             }
2105         }
2106         $oldbiblio->{norequests} = 1 unless $can_place_holds;
2107         $oldbiblio->{items_count}          = $items_count;
2108         $oldbiblio->{available_items_loop} = \@available_items_loop;
2109         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
2110         $oldbiblio->{other_items_loop}     = \@other_items_loop;
2111         $oldbiblio->{availablecount}       = $available_count;
2112         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
2113         $oldbiblio->{onloancount}          = $onloan_count;
2114         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
2115         $oldbiblio->{othercount}           = $other_count;
2116         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
2117         $oldbiblio->{withdrawncount}        = $withdrawn_count;
2118         $oldbiblio->{itemlostcount}        = $itemlost_count;
2119         $oldbiblio->{damagedcount}         = $itemdamaged_count;
2120         $oldbiblio->{intransitcount}       = $item_in_transit_count;
2121         $oldbiblio->{onholdcount}          = $item_onhold_count;
2122         $oldbiblio->{orderedcount}         = $ordered_count;
2123         $oldbiblio->{notforloancount}      = $notforloan_count;
2124
2125         if (C4::Context->preference("AlternateHoldingsField") && $items_count == 0) {
2126             my $fieldspec = C4::Context->preference("AlternateHoldingsField");
2127             my $subfields = substr $fieldspec, 3;
2128             my $holdingsep = C4::Context->preference("AlternateHoldingsSeparator") || ' ';
2129             my @alternateholdingsinfo = ();
2130             my @holdingsfields = $marcrecord->field(substr $fieldspec, 0, 3);
2131             my $alternateholdingscount = 0;
2132
2133             for my $field (@holdingsfields) {
2134                 my %holding = ( holding => '' );
2135                 my $havesubfield = 0;
2136                 for my $subfield ($field->subfields()) {
2137                     if ((index $subfields, $$subfield[0]) >= 0) {
2138                         $holding{'holding'} .= $holdingsep if (length $holding{'holding'} > 0);
2139                         $holding{'holding'} .= $$subfield[1];
2140                         $havesubfield++;
2141                     }
2142                 }
2143                 if ($havesubfield) {
2144                     push(@alternateholdingsinfo, \%holding);
2145                     $alternateholdingscount++;
2146                 }
2147             }
2148
2149             $oldbiblio->{'ALTERNATEHOLDINGS'} = \@alternateholdingsinfo;
2150             $oldbiblio->{'alternateholdings_count'} = $alternateholdingscount;
2151         }
2152
2153         $oldbiblio->{biblio_object} = Koha::Biblios->find( $oldbiblio->{biblionumber} );
2154
2155         push( @newresults, $oldbiblio );
2156     }
2157
2158     return @newresults;
2159 }
2160
2161 =head2 enabled_staff_search_views
2162
2163 %hash = enabled_staff_search_views()
2164
2165 This function returns a hash that contains three flags obtained from the system
2166 preferences, used to determine whether a particular staff search results view
2167 is enabled.
2168
2169 =over 2
2170
2171 =item C<Output arg:>
2172
2173     * $hash{can_view_MARC} is true only if the MARC view is enabled
2174     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2175     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2176
2177 =item C<usage in the script:>
2178
2179 =back
2180
2181 $template->param ( C4::Search::enabled_staff_search_views );
2182
2183 =cut
2184
2185 sub enabled_staff_search_views
2186 {
2187         return (
2188                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2189                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2190                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2191         );
2192 }
2193
2194 =head2 z3950_search_args
2195
2196 $arrayref = z3950_search_args($matchpoints)
2197
2198 This function returns an array reference that contains the search parameters to be
2199 passed to the Z39.50 search script (z3950_search.pl). The array elements
2200 are hash refs whose keys are name and value, and whose values are the
2201 name of a search parameter, the value of that search parameter and the URL encoded
2202 value of that parameter.
2203
2204 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2205
2206 The search parameter values are obtained from the bibliographic record whose
2207 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2208
2209 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2210 a general purpose search argument. In this case, the returned array contains only
2211 entry: the key is 'title' and the value is derived from $matchpoints.
2212
2213 If a search parameter value is undefined or empty, it is not included in the returned
2214 array.
2215
2216 The returned array reference may be passed directly to the template parameters.
2217
2218 =over 2
2219
2220 =item C<Output arg:>
2221
2222     * $array containing hash refs as described above
2223
2224 =item C<usage in the script:>
2225
2226 =back
2227
2228 $data = Biblio::GetBiblioData($bibno);
2229 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2230
2231 *OR*
2232
2233 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2234
2235 =cut
2236
2237 sub z3950_search_args {
2238     my $bibrec = shift;
2239
2240     my $isbn_string = ref( $bibrec ) ? $bibrec->{title} : $bibrec;
2241     my $isbn = Business::ISBN->new( $isbn_string );
2242
2243     if (defined $isbn && $isbn->is_valid)
2244     {
2245         if ( ref($bibrec) ) {
2246             $bibrec->{isbn} = $isbn_string;
2247             $bibrec->{title} = undef;
2248         } else {
2249             $bibrec = { isbn => $isbn_string };
2250         }
2251     }
2252     else {
2253         $bibrec = { title => $bibrec } if !ref $bibrec;
2254     }
2255     my $array = [];
2256     for my $field (qw/ lccn isbn issn title author dewey subject /)
2257     {
2258         push @$array, { name => $field, value => $bibrec->{$field} }
2259           if defined $bibrec->{$field};
2260     }
2261     return $array;
2262 }
2263
2264 =head2 GetDistinctValues($field);
2265
2266 C<$field> is a reference to the fields array
2267
2268 =cut
2269
2270 sub GetDistinctValues {
2271     my ($fieldname,$string)=@_;
2272     # returns a reference to a hash of references to branches...
2273     if ($fieldname=~/\./){
2274                         my ($table,$column)=split /\./, $fieldname;
2275                         my $dbh = C4::Context->dbh;
2276                         warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column " if $DEBUG;
2277                         my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
2278                         $sth->execute;
2279                         my $elements=$sth->fetchall_arrayref({});
2280                         return $elements;
2281    }
2282    else {
2283                 $string||= qq("");
2284                 my @servers=qw<biblioserver authorityserver>;
2285                 my (@zconns,@results);
2286         for ( my $i = 0 ; $i < @servers ; $i++ ) {
2287                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
2288                         $results[$i] =
2289                       $zconns[$i]->scan(
2290                         ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
2291                       );
2292                 }
2293                 # The big moment: asynchronously retrieve results from all servers
2294                 my @elements;
2295         _ZOOM_event_loop(
2296             \@zconns,
2297             \@results,
2298             sub {
2299                 my ( $i, $size ) = @_;
2300                 for ( my $j = 0 ; $j < $size ; $j++ ) {
2301                     my %hashscan;
2302                     @hashscan{qw(value cnt)} =
2303                       $results[ $i - 1 ]->display_term($j);
2304                     push @elements, \%hashscan;
2305                 }
2306             }
2307         );
2308                 return \@elements;
2309    }
2310 }
2311
2312 =head2 _ZOOM_event_loop
2313
2314     _ZOOM_event_loop(\@zconns, \@results, sub {
2315         my ( $i, $size ) = @_;
2316         ....
2317     } );
2318
2319 Processes a ZOOM event loop and passes control to a closure for
2320 processing the results, and destroying the resultsets.
2321
2322 =cut
2323
2324 sub _ZOOM_event_loop {
2325     my ($zconns, $results, $callback) = @_;
2326     while ( ( my $i = ZOOM::event( $zconns ) ) != 0 ) {
2327         my $ev = $zconns->[ $i - 1 ]->last_event();
2328         if ( $ev == ZOOM::Event::ZEND ) {
2329             next unless $results->[ $i - 1 ];
2330             my $size = $results->[ $i - 1 ]->size();
2331             if ( $size > 0 ) {
2332                 $callback->($i, $size);
2333             }
2334         }
2335     }
2336
2337     foreach my $result (@$results) {
2338         $result->destroy();
2339     }
2340 }
2341
2342 =head2 new_record_from_zebra
2343
2344 Given raw data from a searchengine result set, return a MARC::Record object
2345
2346 This helper function is needed to take into account all the involved
2347 system preferences and configuration variables to properly create the
2348 MARC::Record object.
2349
2350 If we are using GRS-1, then the raw data we get from Zebra should be USMARC
2351 data. If we are using DOM, then it has to be MARCXML.
2352
2353 If we are using elasticsearch, it'll already be a MARC::Record and this
2354 function needs a new name.
2355
2356 =cut
2357
2358 sub new_record_from_zebra {
2359
2360     my $server   = shift;
2361     my $raw_data = shift;
2362     # Set the default indexing modes
2363     my $search_engine = C4::Context->preference("SearchEngine");
2364     if ($search_engine eq 'Elasticsearch') {
2365         return ref $raw_data eq 'MARC::Record' ? $raw_data : MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2366     }
2367     my $index_mode = ( $server eq 'biblioserver' )
2368                         ? C4::Context->config('zebra_bib_index_mode') // 'dom'
2369                         : C4::Context->config('zebra_auth_index_mode') // 'dom';
2370
2371     my $marc_record =  eval {
2372         if ( $index_mode eq 'dom' ) {
2373             MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2374         } else {
2375             MARC::Record->new_from_usmarc( $raw_data );
2376         }
2377     };
2378
2379     if ($@) {
2380         return;
2381     } else {
2382         return $marc_record;
2383     }
2384
2385 }
2386
2387 END { }    # module clean-up code here (global destructor)
2388
2389 1;
2390 __END__
2391
2392 =head1 AUTHOR
2393
2394 Koha Development Team <http://koha-community.org/>
2395
2396 =cut