C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  17
  18 use Modern::Perl;
  19 use C4::Context;
  20 use C4::Biblio qw( TransformMarcToKoha GetMarcFromKohaField GetFrameworkCode GetAuthorisedValueDesc GetBiblioData );
  21 use C4::Koha qw( getFacets GetVariationsOfISBN GetNormalizedUPC GetNormalizedEAN GetNormalizedOCLCNumber GetNormalizedISBN getitemtypeimagelocation );
  22 use Koha::DateUtils;
  23 use Koha::Libraries;
  24 use Lingua::Stem;
  25 use XML::Simple;
  26 use C4::XSLT qw( XSLTParse4Display );
  27 use C4::Reserves qw( GetReserveStatus );
  28 use C4::Charset qw( SetUTF8Flag );
  29 use Koha::AuthorisedValues;
  30 use Koha::ItemTypes;
  31 use Koha::Libraries;
  32 use Koha::Logger;
  33 use Koha::Patrons;
  34 use Koha::Recalls;
  35 use Koha::RecordProcessor;
  36 use URI::Escape;
  37 use Business::ISBN;
  38 use MARC::Record;
  39 use MARC::Field;
  40
  41 our (@ISA, @EXPORT_OK);
  42 BEGIN {
  43     require Exporter;
  44     @ISA    = qw(Exporter);
  45     @EXPORT_OK = qw(
  46       FindDuplicate
  47       SimpleSearch
  48       searchResults
  49       getRecords
  50       buildQuery
  51       GetDistinctValues
  52       enabled_staff_search_views
  53       new_record_from_zebra
  54       z3950_search_args
  55       getIndexes
  56     );
  57 }
  58
  59 =head1 NAME
  60
  61 C4::Search - Functions for searching the Koha catalog.
  62
  63 =head1 SYNOPSIS
  64
  65 See opac/opac-search.pl or catalogue/search.pl for example of usage
  66
  67 =head1 DESCRIPTION
  68
  69 This module provides searching functions for Koha's bibliographic databases
  70
  71 =head1 FUNCTIONS
  72
  73 =cut
  74
  75 # make all your functions, whether exported or not;
  76
  77 =head2 FindDuplicate
  78
  79 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  80
  81 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  82
  83 =cut
  84
  85 sub FindDuplicate {
  86     my ($record) = @_;
  87     my $dbh = C4::Context->dbh;
  88     my $result = TransformMarcToKoha( $record, '' );
  89     my $sth;
  90     my $query;
  91
  92     # search duplicate on ISBN, easy and fast..
  93     # ... normalize first
  94     if ( $result->{isbn} ) {
  95         $result->{isbn} =~ s/\(.*$//;
  96         $result->{isbn} =~ s/\s+$//;
  97         $query = "isbn:$result->{isbn}";
  98     }
  99     else {
 100
 101         my $titleindex = 'ti,ext';
 102         my $authorindex = 'au,ext';
 103         my $op = 'AND';
 104
 105         $result->{title} =~ s /\\//g;
 106         $result->{title} =~ s /\"//g;
 107         $result->{title} =~ s /\(//g;
 108         $result->{title} =~ s /\)//g;
 109
 110         $query = "$titleindex:\"$result->{title}\"";
 111         if   ( $result->{author} ) {
 112             $result->{author} =~ s /\\//g;
 113             $result->{author} =~ s /\"//g;
 114             $result->{author} =~ s /\(//g;
 115             $result->{author} =~ s /\)//g;
 116
 117             $query .= " $op $authorindex:\"$result->{author}\"";
 118         }
 119     }
 120
 121     my $searcher = Koha::SearchEngine::Search->new({index => $Koha::SearchEngine::BIBLIOS_INDEX});
 122     my ( $error, $searchresults, undef ) = $searcher->simple_search_compat($query,0,50);
 123     my @results;
 124     if (!defined $error) {
 125         foreach my $possible_duplicate_record (@{$searchresults}) {
 126             my $marcrecord = new_record_from_zebra(
 127                 'biblioserver',
 128                 $possible_duplicate_record
 129             );
 130
 131             my $result = TransformMarcToKoha( $marcrecord, '' );
 132
 133             # FIXME :: why 2 $biblionumber ?
 134             if ($result) {
 135                 push @results, $result->{'biblionumber'};
 136                 push @results, $result->{'title'};
 137             }
 138         }
 139     }
 140     return @results;
 141 }
 142
 143 =head2 SimpleSearch
 144
 145 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers], [%options] );
 146
 147 This function provides a simple search API on the bibliographic catalog
 148
 149 =over 2
 150
 151 =item C<input arg:>
 152
 153     * $query can be a simple keyword or a complete CCL query
 154     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 155     * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0
 156     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 157     * %options is optional. (e.g. "skip_normalize" allows you to skip changing : to = )
 158
 159
 160 =item C<Return:>
 161
 162     Returns an array consisting of three elements
 163     * $error is undefined unless an error is detected
 164     * $results is a reference to an array of records.
 165     * $total_hits is the number of hits that would have been returned with no limit
 166
 167     If an error is returned the two other return elements are undefined. If error itself is undefined
 168     the other two elements are always defined
 169
 170 =item C<usage in the script:>
 171
 172 =back
 173
 174 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 175
 176 if (defined $error) {
 177     $template->param(query_error => $error);
 178     warn "error: ".$error;
 179     output_html_with_http_headers $input, $cookie, $template->output;
 180     exit;
 181 }
 182
 183 my $hits = @{$marcresults};
 184 my @results;
 185
 186 for my $r ( @{$marcresults} ) {
 187     my $marcrecord = MARC::File::USMARC::decode($r);
 188     my $biblio = TransformMarcToKoha($marcrecord,q{});
 189
 190     #build the iarray of hashs for the template.
 191     push @results, {
 192         title           => $biblio->{'title'},
 193         subtitle        => $biblio->{'subtitle'},
 194         biblionumber    => $biblio->{'biblionumber'},
 195         author          => $biblio->{'author'},
 196         publishercode   => $biblio->{'publishercode'},
 197         publicationyear => $biblio->{'publicationyear'},
 198         };
 199
 200 }
 201
 202 $template->param(result=>\@results);
 203
 204 =cut
 205
 206 sub SimpleSearch {
 207     my ( $query, $offset, $max_results, $servers, %options )  = @_;
 208
 209     return ( 'No query entered', undef, undef ) unless $query;
 210     # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 211     my @servers = defined ( $servers ) ? @$servers : ( 'biblioserver' );
 212     my @zoom_queries;
 213     my @tmpresults;
 214     my @zconns;
 215     my $results = [];
 216     my $total_hits = 0;
 217
 218     # Initialize & Search Zebra
 219     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 220         eval {
 221             $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 222             $query =~ s/:/=/g unless $options{skip_normalize};
 223             $zoom_queries[$i] = ZOOM::Query::CCL2RPN->new( $query, $zconns[$i]);
 224             $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 225
 226             # error handling
 227             my $error =
 228                 $zconns[$i]->errmsg() . " ("
 229               . $zconns[$i]->errcode() . ") "
 230               . $zconns[$i]->addinfo() . " "
 231               . $zconns[$i]->diagset();
 232
 233             return ( $error, undef, undef ) if $zconns[$i]->errcode();
 234         };
 235         if ($@) {
 236
 237             # caught a ZOOM::Exception
 238             my $error =
 239                 $@->message() . " ("
 240               . $@->code() . ") "
 241               . $@->addinfo() . " "
 242               . $@->diagset();
 243             warn $error." for query: $query";
 244             return ( $error, undef, undef );
 245         }
 246     }
 247
 248     _ZOOM_event_loop(
 249         \@zconns,
 250         \@tmpresults,
 251         sub {
 252             my ($i, $size) = @_;
 253             my $first_record = defined($offset) ? $offset + 1 : 1;
 254             my $hits = $tmpresults[ $i - 1 ]->size();
 255             $total_hits += $hits;
 256             my $last_record = $hits;
 257             if ( defined $max_results && $offset + $max_results < $hits ) {
 258                 $last_record = $offset + $max_results;
 259             }
 260
 261             for my $j ( $first_record .. $last_record ) {
 262                 my $record = eval {
 263                   $tmpresults[ $i - 1 ]->record( $j - 1 )->raw()
 264                   ;    # 0 indexed
 265                 };
 266                 push @{$results}, $record if defined $record;
 267             }
 268         }
 269     );
 270
 271     foreach my $zoom_query (@zoom_queries) {
 272         $zoom_query->destroy();
 273     }
 274
 275     return ( undef, $results, $total_hits );
 276 }
 277
 278 =head2 getRecords
 279
 280 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 281
 282         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 283         $results_per_page, $offset,       $branches,       $itemtypes,
 284         $query_type,       $scan,         $opac
 285     );
 286
 287 The all singing, all dancing, multi-server, asynchronous, scanning,
 288 searching, record nabbing, facet-building
 289
 290 See verbose embedded documentation.
 291
 292 =cut
 293
 294 sub getRecords {
 295     my (
 296         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 297         $results_per_page, $offset,       $branches,         $itemtypes,
 298         $query_type,       $scan,         $opac
 299     ) = @_;
 300
 301     my @servers = @$servers_ref;
 302     my @sort_by = @$sort_by_ref;
 303     $offset = 0 if $offset < 0;
 304
 305     # Initialize variables for the ZOOM connection and results object
 306     my @zconns;
 307     my @results;
 308     my $results_hashref = ();
 309
 310     # TODO simplify this structure ( { branchcode => $branchname } is enought) and remove this parameter
 311     $branches ||= { map { $_->branchcode => { branchname => $_->branchname } } Koha::Libraries->search->as_list };
 312
 313     # Initialize variables for the faceted results objects
 314     my $facets_counter = {};
 315     my $facets_info    = {};
 316     my $facets         = getFacets();
 317
 318     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 319
 320     ### LOOP THROUGH THE SERVERS
 321     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 322         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 323
 324 # perform the search, create the results objects
 325 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 326         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 327
 328         Koha::Logger->get->debug($simple_query) if $scan;
 329
 330         # Check if we've got a query_type defined, if so, use it
 331         eval {
 332             if ($query_type) {
 333                 if ($query_type =~ /^ccl/) {
 334                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 335                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 336                 } elsif ($query_type =~ /^cql/) {
 337                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CQL->new($query_to_use, $zconns[$i]));
 338                 } elsif ($query_type =~ /^pqf/) {
 339                     $results[$i] = $zconns[$i]->search(ZOOM::Query::PQF->new($query_to_use, $zconns[$i]));
 340                 } else {
 341                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 342                 }
 343             } elsif ($scan) {
 344                     $results[$i] = $zconns[$i]->scan(  ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 345             } else {
 346                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 347             }
 348         };
 349         if ($@) {
 350             warn "WARNING: query problem with $query_to_use " . $@;
 351         }
 352
 353         # Concatenate the sort_by limits and pass them to the results object
 354         # Note: sort will override rank
 355         my $sort_by;
 356         foreach my $sort (@sort_by) {
 357             if ( $sort eq "author_az" || $sort eq "author_asc" ) {
 358                 $sort_by .= "1=1003 <i ";
 359             }
 360             elsif ( $sort eq "author_za" || $sort eq "author_dsc" ) {
 361                 $sort_by .= "1=1003 >i ";
 362             }
 363             elsif ( $sort eq "popularity_asc" ) {
 364                 $sort_by .= "1=9003 <i ";
 365             }
 366             elsif ( $sort eq "popularity_dsc" ) {
 367                 $sort_by .= "1=9003 >i ";
 368             }
 369             elsif ( $sort eq "call_number_asc" ) {
 370                 $sort_by .= "1=8007  <i ";
 371             }
 372             elsif ( $sort eq "call_number_dsc" ) {
 373                 $sort_by .= "1=8007 >i ";
 374             }
 375             elsif ( $sort eq "pubdate_asc" ) {
 376                 $sort_by .= "1=31 <i ";
 377             }
 378             elsif ( $sort eq "pubdate_dsc" ) {
 379                 $sort_by .= "1=31 >i ";
 380             }
 381             elsif ( $sort eq "acqdate_asc" ) {
 382                 $sort_by .= "1=32 <i ";
 383             }
 384             elsif ( $sort eq "acqdate_dsc" ) {
 385                 $sort_by .= "1=32 >i ";
 386             }
 387             elsif ( $sort eq "title_az" || $sort eq "title_asc" ) {
 388                 $sort_by .= "1=4 <i ";
 389             }
 390             elsif ( $sort eq "title_za" || $sort eq "title_dsc" ) {
 391                 $sort_by .= "1=4 >i ";
 392             }
 393             else {
 394                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 395             }
 396         }
 397         if ( $sort_by && !$scan && $results[$i] ) {
 398             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 399                 warn "WARNING sort $sort_by failed";
 400             }
 401         }
 402     }    # finished looping through servers
 403
 404     # The big moment: asynchronously retrieve results from all servers
 405         _ZOOM_event_loop(
 406             \@zconns,
 407             \@results,
 408             sub {
 409                 my ( $i, $size ) = @_;
 410                 my $results_hash;
 411
 412                 # loop through the results
 413                 $results_hash->{'hits'} = $size;
 414                 my $times;
 415                 if ( $offset + $results_per_page <= $size ) {
 416                     $times = $offset + $results_per_page;
 417                 }
 418                 else {
 419                     $times = $size;
 420                 }
 421
 422                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 423                     my $record;
 424
 425                     ## Check if it's an index scan
 426                     if ($scan) {
 427                         my ( $term, $occ ) = $results[ $i - 1 ]->display_term($j);
 428
 429                  # here we create a minimal MARC record and hand it off to the
 430                  # template just like a normal result ... perhaps not ideal, but
 431                  # it works for now
 432                         my $tmprecord = MARC::Record->new();
 433                         $tmprecord->encoding('UTF-8');
 434                         my $tmptitle;
 435                         my $tmpauthor;
 436
 437                 # the minimal record in author/title (depending on MARC flavour)
 438                         if ( C4::Context->preference("marcflavour") eq
 439                             "UNIMARC" )
 440                         {
 441                             $tmptitle = MARC::Field->new(
 442                                 '200', ' ', ' ',
 443                                 a => $term,
 444                                 f => $occ
 445                             );
 446                             $tmprecord->append_fields($tmptitle);
 447                         }
 448                         else {
 449                             $tmptitle =
 450                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 451                             $tmpauthor =
 452                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 453                             $tmprecord->append_fields($tmptitle);
 454                             $tmprecord->append_fields($tmpauthor);
 455                         }
 456                         $results_hash->{'RECORDS'}[$j] =
 457                           $tmprecord->as_usmarc();
 458                     }
 459
 460                     # not an index scan
 461                     else {
 462                         $record = $results[ $i - 1 ]->record($j)->raw();
 463                         # warn "RECORD $j:".$record;
 464                         $results_hash->{'RECORDS'}[$j] = $record;
 465                     }
 466
 467                 }
 468                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 469
 470                 # Fill the facets while we're looping, but only for the
 471                 # biblioserver and not for a scan
 472                 if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) {
 473                     $facets_counter = GetFacets( $results[ $i - 1 ] );
 474                     $facets_info    = _get_facets_info( $facets );
 475                 }
 476
 477                 # BUILD FACETS
 478                 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 479                     for my $link_value (
 480                         sort { $a cmp $b } keys %$facets_counter
 481                       )
 482                     {
 483                         my @this_facets_array;
 484                         for my $one_facet (
 485                             sort {
 486                                 $facets_counter->{$link_value}
 487                                   ->{$b} <=> $facets_counter->{$link_value}
 488                                   ->{$a}
 489                             } keys %{ $facets_counter->{$link_value} }
 490                           )
 491                         {
 492 # Sanitize the link value : parenthesis, question and exclamation mark will cause errors with CCL
 493                             my $facet_link_value = $one_facet;
 494                             $facet_link_value =~ s/[()!?¡¿؟]/ /g;
 495
 496                             # fix the length that will display in the label,
 497                             my $facet_label_value = $one_facet;
 498                             my $facet_max_length  = C4::Context->preference(
 499                                 'FacetLabelTruncationLength')
 500                               || 20;
 501                             $facet_label_value =
 502                               substr( $one_facet, 0, $facet_max_length )
 503                               . "..."
 504                               if length($facet_label_value) >
 505                                   $facet_max_length;
 506
 507                         # if it's a branch, label by the name, not the code,
 508                             if ( $link_value =~ /branch/ ) {
 509                                 if (   defined $branches
 510                                     && ref($branches) eq "HASH"
 511                                     && defined $branches->{$one_facet}
 512                                     && ref( $branches->{$one_facet} ) eq
 513                                     "HASH" )
 514                                 {
 515                                     $facet_label_value =
 516                                       $branches->{$one_facet}
 517                                       ->{'branchname'};
 518                                 }
 519                                 else {
 520                                     $facet_label_value = "*";
 521                                 }
 522                             }
 523
 524                       # if it's a itemtype, label by the name, not the code,
 525                             if ( $link_value =~ /itype/ ) {
 526                                 if (   defined $itemtypes
 527                                     && ref($itemtypes) eq "HASH"
 528                                     && defined $itemtypes->{$one_facet}
 529                                     && ref( $itemtypes->{$one_facet} ) eq
 530                                     "HASH" )
 531                                 {
 532                                     $facet_label_value =
 533                                       $itemtypes->{$one_facet}
 534                                       ->{translated_description};
 535                                 }
 536                             }
 537
 538            # also, if it's a location code, use the name instead of the code
 539                             if ( $link_value =~ /location/ ) {
 540                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 541                                 my $av = Koha::AuthorisedValues->search({ category => 'LOC', authorised_value => $one_facet });
 542                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 543                             }
 544
 545                             # also, if it's a collection code, use the name instead of the code
 546                             if ( $link_value =~ /ccode/ ) {
 547                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 548                                 my $av = Koha::AuthorisedValues->search({ category => 'CCODE', authorised_value => $one_facet });
 549                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 550                             }
 551
 552             # but we're down with the whole label being in the link's title.
 553                             push @this_facets_array,
 554                               {
 555                                 facet_count =>
 556                                   $facets_counter->{$link_value}
 557                                   ->{$one_facet},
 558                                 facet_label_value => $facet_label_value,
 559                                 facet_title_value => $one_facet,
 560                                 facet_link_value  => $facet_link_value,
 561                                 type_link_value   => $link_value,
 562                               }
 563                               if ($facet_label_value);
 564                         }
 565
 566                         push @facets_loop,
 567                           {
 568                             type_link_value => $link_value,
 569                             type_id         => $link_value . "_id",
 570                             "type_label_"
 571                               . $facets_info->{$link_value}->{'label_value'} =>
 572                               1,
 573                             facets     => \@this_facets_array,
 574                           }
 575                           unless (
 576                             (
 577                                 $facets_info->{$link_value}->{'label_value'} =~
 578                                 /Libraries/
 579                             )
 580                             and ( Koha::Libraries->search->count == 1 )
 581                           );
 582                     }
 583                 }
 584             }
 585         );
 586
 587     # This sorts the facets into alphabetical order
 588     if (@facets_loop) {
 589         foreach my $f (@facets_loop) {
 590             if( C4::Context->preference('FacetOrder') eq 'Alphabetical' ){
 591                 $f->{facets} =
 592                     [ sort { uc($a->{facet_label_value}) cmp uc($b->{facet_label_value}) } @{ $f->{facets} } ];
 593             }
 594         }
 595     }
 596
 597     return ( undef, $results_hashref, \@facets_loop );
 598 }
 599
 600 sub GetFacets {
 601
 602     my $rs = shift;
 603     my $facets;
 604
 605     my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0;
 606
 607     if ( $use_zebra_facets ) {
 608         $facets = _get_facets_from_zebra( $rs );
 609     } else {
 610         $facets = _get_facets_from_records( $rs );
 611     }
 612
 613     return $facets;
 614 }
 615
 616 sub _get_facets_from_records {
 617
 618     my $rs = shift;
 619
 620     my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20;
 621     my $facets_config  = getFacets();
 622     my $facets         = {};
 623     my $size           = $rs->size();
 624     my $jmax           = $size > $facets_maxrecs
 625                             ? $facets_maxrecs
 626                             : $size;
 627
 628     for ( my $j = 0 ; $j < $jmax ; $j++ ) {
 629
 630         my $marc_record = new_record_from_zebra (
 631                 'biblioserver',
 632                 $rs->record( $j )->raw()
 633         );
 634
 635         if ( ! defined $marc_record ) {
 636             warn "ERROR DECODING RECORD - $@: " .
 637                 $rs->record( $j )->raw();
 638             next;
 639         }
 640
 641         _get_facets_data_from_record( $marc_record, $facets_config, $facets );
 642     }
 643
 644     return $facets;
 645 }
 646
 647 =head2 _get_facets_data_from_record
 648
 649     C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter );
 650
 651 Internal function that extracts facets information from a MARC::Record object
 652 and populates $facets_counter for using in getRecords.
 653
 654 $facets is expected to be filled with C4::Koha::getFacets output (i.e. the configured
 655 facets for Zebra).
 656
 657 =cut
 658
 659 sub _get_facets_data_from_record {
 660
 661     my ( $marc_record, $facets, $facets_counter ) = @_;
 662
 663     for my $facet (@$facets) {
 664
 665         my @used_datas = ();
 666
 667         foreach my $tag ( @{ $facet->{ tags } } ) {
 668
 669             # tag number is the first three digits
 670             my $tag_num          = substr( $tag, 0, 3 );
 671             # subfields are the remainder
 672             my $subfield_letters = substr( $tag, 3 );
 673
 674             my @fields = $marc_record->field( $tag_num );
 675             foreach my $field (@fields) {
 676                 # If $field->indicator(1) eq 'z', it means it is a 'see from'
 677                 # field introduced because of IncludeSeeFromInSearches, so skip it
 678                 next if $field->indicator(1) eq 'z';
 679
 680                 my $data = $field->as_string( $subfield_letters, $facet->{ sep } );
 681                 $data =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
 682
 683                 unless ( grep { $_ eq $data } @used_datas ) {
 684                     push @used_datas, $data;
 685                     $facets_counter->{ $facet->{ idx } }->{ $data }++;
 686                 }
 687             }
 688         }
 689     }
 690 }
 691
 692 =head2 _get_facets_from_zebra
 693
 694     my $facets = _get_facets_from_zebra( $result_set )
 695
 696 Retrieves facets for a specified result set. It loops through the facets defined
 697 in C4::Koha::getFacets and returns a hash with the following structure:
 698
 699    {  facet_idx => {
 700             facet_value => count
 701       },
 702       ...
 703    }
 704
 705 =cut
 706
 707 sub _get_facets_from_zebra {
 708
 709     my $rs = shift;
 710
 711     # save current elementSetName
 712     my $elementSetName = $rs->option( 'elementSetName' );
 713
 714     my $facets_loop = getFacets();
 715     my $facets_data  = {};
 716     # loop through defined facets and fill the facets hashref
 717     foreach my $facet ( @$facets_loop ) {
 718
 719         my $idx = $facet->{ idx };
 720         my $sep = $facet->{ sep };
 721         my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep );
 722         if ( $facet_values ) {
 723             # we've actually got a result
 724             $facets_data->{ $idx } = $facet_values;
 725         }
 726     }
 727     # set elementSetName to its previous value to avoid side effects
 728     $rs->option( elementSetName => $elementSetName );
 729
 730     return $facets_data;
 731 }
 732
 733 =head2 _get_facet_from_result_set
 734
 735     my $facet_values =
 736         C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep )
 737
 738 Internal function that extracts facet information for a specific index ($facet_idx) and
 739 returns a hash containing facet values and count:
 740
 741     {
 742         $facet_value => $count ,
 743         ...
 744     }
 745
 746 Warning: this function has the side effect of changing the elementSetName for the result
 747 set. It is a helper function for the main loop, which takes care of backing it up for
 748 restoring.
 749
 750 =cut
 751
 752 sub _get_facet_from_result_set {
 753
 754     my $facet_idx = shift;
 755     my $rs        = shift;
 756     my $sep       = shift;
 757
 758     my $internal_sep  = '<*>';
 759     my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20;
 760
 761     return if ( ! defined $facet_idx || ! defined $rs );
 762     # zebra's facet element, untokenized index
 763     my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount;
 764     # configure zebra results for retrieving the desired facet
 765     $rs->option( elementSetName => $facet_element );
 766     # get the facet record from result set
 767     my $facet = $rs->record( 0 )->raw;
 768     # if the facet has no restuls...
 769     return if !defined $facet;
 770     # TODO: benchmark DOM vs. SAX performance
 771     my $facet_dom = XML::LibXML->load_xml(
 772       string => ($facet)
 773     );
 774     my @terms = $facet_dom->getElementsByTagName('term');
 775     return if ! @terms;
 776
 777     my $facets = {};
 778     foreach my $term ( @terms ) {
 779         my $facet_value = $term->textContent;
 780         $facet_value =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
 781         $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
 782         $facets->{ $facet_value } += $term->getAttribute( 'occur' );
 783     }
 784
 785     return $facets;
 786 }
 787
 788 =head2 _get_facets_info
 789
 790     my $facets_info = C4::Search::_get_facets_info( $facets )
 791
 792 Internal function that extracts facets information and properly builds
 793 the data structure needed to render facet labels.
 794
 795 =cut
 796
 797 sub _get_facets_info {
 798
 799     my $facets = shift;
 800
 801     my $facets_info = {};
 802
 803     for my $facet ( @$facets ) {
 804         $facets_info->{ $facet->{ idx } }->{ label_value } = $facet->{ label };
 805     }
 806
 807     return $facets_info;
 808 }
 809
 810 # TRUNCATION
 811 sub _detect_truncation {
 812     my ( $operand, $index ) = @_;
 813     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 814         @regexpr );
 815     $operand =~ s/^ //g;
 816     my @wordlist = split( /\s/, $operand );
 817     foreach my $word (@wordlist) {
 818         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 819             push @rightlefttruncated, $word;
 820         }
 821         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 822             push @lefttruncated, $word;
 823         }
 824         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 825             push @righttruncated, $word;
 826         }
 827         elsif ( index( $word, "*" ) < 0 ) {
 828             push @nontruncated, $word;
 829         }
 830         else {
 831             push @regexpr, $word;
 832         }
 833     }
 834     return (
 835         \@nontruncated,       \@righttruncated, \@lefttruncated,
 836         \@rightlefttruncated, \@regexpr
 837     );
 838 }
 839
 840 # STEMMING
 841 sub _build_stemmed_operand {
 842     my ($operand,$lang) = @_;
 843     require Lingua::Stem::Snowball ;
 844     my $stemmed_operand=q{};
 845
 846     # Stemmer needs language
 847     return $operand unless $lang;
 848
 849     # If operand contains a digit, it is almost certainly an identifier, and should
 850     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 851     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 852     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 853     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 854     return $operand if $operand =~ /\d/;
 855
 856 # FIXME: the locale should be set based on the user's language and/or search choice
 857     #warn "$lang";
 858     # Make sure we only use the first two letters from the language code
 859     $lang = lc(substr($lang, 0, 2));
 860     # The language codes for the two variants of Norwegian will now be "nb" and "nn",
 861     # none of which Lingua::Stem::Snowball can use, so we need to "translate" them
 862     if ($lang eq 'nb' || $lang eq 'nn') {
 863       $lang = 'no';
 864     }
 865     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
 866                                                encoding => "UTF-8" );
 867
 868     my @words = split( / /, $operand );
 869     my @stems = $stemmer->stem(\@words);
 870     for my $stem (@stems) {
 871         $stemmed_operand .= "$stem";
 872         $stemmed_operand .= "?"
 873           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 874         $stemmed_operand .= " ";
 875     }
 876
 877     Koha::Logger->get->debug("STEMMED OPERAND: $stemmed_operand");
 878     return $stemmed_operand;
 879 }
 880
 881 # FIELD WEIGHTING
 882 sub _build_weighted_query {
 883
 884 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 885 # pretty well but could work much better if we had a smarter query parser
 886     my ( $operand, $stemmed_operand, $index ) = @_;
 887     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 888     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 889     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 890     $operand =~ s/"/ /g;    # Bug 7518: searches with quotation marks don't work
 891
 892     my $weighted_query = "(rk=(";    # Specifies that we're applying rank
 893
 894     # Keyword, or, no index specified
 895     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 896         $weighted_query .=
 897           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 898         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 899         $weighted_query .= " or Title-cover,phr,r3=\"$operand\"";    # phrase title
 900         $weighted_query .= " or ti,wrdl,r4=\"$operand\"";    # words in title
 901           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 902           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 903         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 904           if $fuzzy_enabled;    # add fuzzy, word list
 905         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 906           if ( $stemming and $stemmed_operand )
 907           ;                     # add stemming, right truncation
 908         $weighted_query .= " or wrdl,r9=\"$operand\"";
 909
 910         # embedded sorting: 0 a-z; 1 z-a
 911         # $weighted_query .= ") or (sort1,aut=1";
 912     }
 913
 914     # Barcode searches should skip this process
 915     elsif ( $index eq 'bc' ) {
 916         $weighted_query .= "bc=\"$operand\"";
 917     }
 918
 919     # Authority-number searches should skip this process
 920     elsif ( $index eq 'an' ) {
 921         $weighted_query .= "an=\"$operand\"";
 922     }
 923
 924     # If the index is numeric, don't autoquote it.
 925     elsif ( $index =~ /,st-numeric$/ ) {
 926         $weighted_query .= " $index=$operand";
 927     }
 928
 929     # If the index already has more than one qualifier, wrap the operand
 930     # in quotes and pass it back (assumption is that the user knows what they
 931     # are doing and won't appreciate us mucking up their query
 932     elsif ( $index =~ ',' ) {
 933         $weighted_query .= " $index=\"$operand\"";
 934     }
 935
 936     #TODO: build better cases based on specific search indexes
 937     else {
 938         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 939           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 940         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 941         $weighted_query .= " or $index,wrdl,r6=\"$operand\"";    # word list index
 942         $weighted_query .= " or $index,wrdl,fuzzy,r8=\"$operand\""
 943           if $fuzzy_enabled;    # add fuzzy, word list
 944         $weighted_query .= " or $index,wrdl,rt,r9=\"$stemmed_operand\""
 945           if ( $stemming and $stemmed_operand );    # add stemming, right truncation
 946     }
 947
 948     $weighted_query .= "))";                       # close rank specification
 949     return $weighted_query;
 950 }
 951
 952 =head2 getIndexes
 953
 954 Return an array with available indexes.
 955
 956 =cut
 957
 958 sub getIndexes{
 959     my @indexes = (
 960                     # biblio indexes
 961                     'ab',
 962                     'Abstract',
 963                     'acqdate',
 964                     'allrecords',
 965                     'an',
 966                     'Any',
 967                     'at',
 968                     'arl',
 969                     'arp',
 970                     'au',
 971                     'aub',
 972                     'aud',
 973                     'audience',
 974                     'auo',
 975                     'aut',
 976                     'Author',
 977                     'Author-in-order ',
 978                     'Author-personal-bibliography',
 979                     'Authority-Number',
 980                     'authtype',
 981                     'bc',
 982                     'Bib-level',
 983                     'biblionumber',
 984                     'bio',
 985                     'biography',
 986                     'callnum',
 987                     'cfn',
 988                     'Chronological-subdivision',
 989                     'cn-bib-source',
 990                     'cn-bib-sort',
 991                     'cn-class',
 992                     'cn-item',
 993                     'cn-prefix',
 994                     'cn-suffix',
 995                     'cpn',
 996                     'Code-institution',
 997                     'Conference-name',
 998                     'Conference-name-heading',
 999                     'Conference-name-see',
1000                     'Conference-name-seealso',
1001                     'Content-type',
1002                     'Control-number',
1003                     'Control-number-identifier',
1004                     'cni',
1005                     'copydate',
1006                     'Corporate-name',
1007                     'Corporate-name-heading',
1008                     'Corporate-name-see',
1009                     'Corporate-name-seealso',
1010                     'Country-publication',
1011                     'ctype',
1012                     'curriculum',
1013                     'date-entered-on-file',
1014                     'Date-of-acquisition',
1015                     'Date-of-publication',
1016                     'Date-time-last-modified',
1017                     'Dewey-classification',
1018                     'Dissertation-information',
1019                     'diss',
1020                     'dtlm',
1021                     'EAN',
1022                     'extent',
1023                     'fic',
1024                     'fiction',
1025                     'Form-subdivision',
1026                     'format',
1027                     'Geographic-subdivision',
1028                     'he',
1029                     'Heading',
1030                     'Heading-use-main-or-added-entry',
1031                     'Heading-use-series-added-entry ',
1032                     'Heading-use-subject-added-entry',
1033                     'Host-item',
1034                     'id-other',
1035                     'ident',
1036                     'Identifier-standard',
1037                     'Illustration-code',
1038                     'Index-term-genre',
1039                     'Index-term-uncontrolled',
1040                     'Interest-age-level',
1041                     'Interest-grade-level',
1042                     'ISBN',
1043                     'isbn',
1044                     'ISSN',
1045                     'issn',
1046                     'itemtype',
1047                     'kw',
1048                     'Koha-Auth-Number',
1049                     'l-format',
1050                     'language',
1051                     'language-original',
1052                     'lc-card',
1053                     'LC-card-number',
1054                     'lcn',
1055                     'lex',
1056                     'lexile-number',
1057                     'llength',
1058                     'ln',
1059                     'ln-audio',
1060                     'ln-subtitle',
1061                     'Local-classification',
1062                     'Local-number',
1063                     'Match-heading',
1064                     'Match-heading-see-from',
1065                     'Material-type',
1066                     'mc-itemtype',
1067                     'mc-rtype',
1068                     'mus',
1069                     'Multipart-resource-level',
1070                     'mrl',
1071                     'name',
1072                     'Music-number',
1073                     'Name-geographic',
1074                     'Name-geographic-heading',
1075                     'Name-geographic-see',
1076                     'Name-geographic-seealso',
1077                     'nb',
1078                     'Note',
1079                     'notes',
1080                     'ns',
1081                     'nt',
1082                     'Other-control-number',
1083                     'pb',
1084                     'Personal-name',
1085                     'Personal-name-heading',
1086                     'Personal-name-see',
1087                     'Personal-name-seealso',
1088                     'pl',
1089                     'Place-publication',
1090                     'pn',
1091                     'popularity',
1092                     'pubdate',
1093                     'Publisher',
1094                     'Provider',
1095                     'pv',
1096                     'Reading-grade-level',
1097                     'Record-control-number',
1098                     'rcn',
1099                     'Record-type',
1100                     'rtype',
1101                     'se',
1102                     'See',
1103                     'See-also',
1104                     'sn',
1105                     'Stock-number',
1106                     'su',
1107                     'Subject',
1108                     'Subject-heading-thesaurus',
1109                     'Subject-name-personal',
1110                     'Subject-subdivision',
1111                     'Summary',
1112                     'Suppress',
1113                     'su-geo',
1114                     'su-na',
1115                     'su-to',
1116                     'su-ut',
1117                     'ut',
1118                     'Term-genre-form',
1119                     'Term-genre-form-heading',
1120                     'Term-genre-form-see',
1121                     'Term-genre-form-seealso',
1122                     'ti',
1123                     'Title',
1124                     'Title-cover',
1125                     'Title-series',
1126                     'Title-uniform',
1127                     'Title-uniform-heading',
1128                     'Title-uniform-see',
1129                     'Title-uniform-seealso',
1130                     'totalissues',
1131                     'yr',
1132
1133                     # items indexes
1134                     'acqsource',
1135                     'barcode',
1136                     'bc',
1137                     'branch',
1138                     'ccode',
1139                     'classification-source',
1140                     'cn-sort',
1141                     'coded-location-qualifier',
1142                     'copynumber',
1143                     'damaged',
1144                     'datelastborrowed',
1145                     'datelastseen',
1146                     'holdingbranch',
1147                     'homebranch',
1148                     'issues',
1149                     'item',
1150                     'itemnumber',
1151                     'itype',
1152                     'Local-classification',
1153                     'location',
1154                     'lost',
1155                     'materials-specified',
1156                     'mc-ccode',
1157                     'mc-itype',
1158                     'mc-loc',
1159                     'notforloan',
1160                     'Number-local-acquisition',
1161                     'onloan',
1162                     'price',
1163                     'renewals',
1164                     'replacementprice',
1165                     'replacementpricedate',
1166                     'reserves',
1167                     'restricted',
1168                     'stack',
1169                     'stocknumber',
1170                     'inv',
1171                     'uri',
1172                     'withdrawn',
1173
1174                     # subject related
1175                   );
1176
1177     return \@indexes;
1178 }
1179
1180 =head2 buildQuery
1181
1182 ( $error, $query,
1183 $simple_query, $query_cgi,
1184 $query_desc, $limit,
1185 $limit_cgi, $limit_desc,
1186 $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
1187
1188 Build queries and limits in CCL, CGI, Human,
1189 handle truncation, stemming, field weighting, fuzziness, etc.
1190
1191 See verbose embedded documentation.
1192
1193
1194 =cut
1195
1196 sub buildQuery {
1197     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
1198
1199     my $query_desc;
1200
1201     # dereference
1202     my @operators = $operators ? @$operators : ();
1203     my @indexes   = $indexes   ? @$indexes   : ();
1204     my @operands  = $operands  ? @$operands  : ();
1205     my @limits    = $limits    ? @$limits    : ();
1206     my @sort_by   = $sort_by   ? @$sort_by   : ();
1207
1208     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
1209     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
1210     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
1211     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
1212
1213     my $query        = $operands[0] // "";
1214     my $simple_query = $operands[0];
1215
1216     # initialize the variables we're passing back
1217     my $query_cgi;
1218     my $query_type;
1219
1220     my $limit;
1221     my $limit_cgi;
1222     my $limit_desc;
1223
1224     my $cclq       = 0;
1225     my $cclindexes = getIndexes();
1226     if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) {
1227         while ( !$cclq && $query =~ /(?:^|\W)([\w-]+)(,[\w-]+)*[:=]/g ) {
1228             my $dx = lc($1);
1229             $cclq = grep { lc($_) eq $dx } @$cclindexes;
1230         }
1231         $query = "ccl=$query" if $cclq;
1232     }
1233
1234 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1235 # DIAGNOSTIC ONLY!!
1236     if ( $query =~ /^ccl=/ ) {
1237         my $q=$';
1238         # This is needed otherwise ccl= and &limit won't work together, and
1239         # this happens when selecting a subject on the opac-detail page
1240         @limits = grep {!/^$/} @limits;
1241         my $original_q = $q; # without available part
1242         unless ( grep { $_ eq 'available' } @limits ) {
1243             $q =~ s| and \( \(allrecords,AlwaysMatches=''\) and \(not-onloan-count,st-numeric >= 1\) and \(lost,st-numeric=0\) \)||;
1244             $original_q = $q;
1245         }
1246         if ( @limits ) {
1247             if ( grep { $_ eq 'available' } @limits ) {
1248                 $q .= q| and ( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )|;
1249                 @limits = grep {!/^available$/} @limits;
1250             }
1251             $q .= ' and '.join(' and ', @limits) if @limits;
1252         }
1253         return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $original_q, '', '', '', 'ccl' );
1254     }
1255     if ( $query =~ /^cql=/ ) {
1256         return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', 'cql' );
1257     }
1258     if ( $query =~ /^pqf=/ ) {
1259         $query_desc = $';
1260         $query_cgi = "q=pqf=".uri_escape_utf8($');
1261         return ( undef, $', $', $query_cgi, $query_desc, '', '', '', 'pqf' );
1262     }
1263
1264     # pass nested queries directly
1265     # FIXME: need better handling of some of these variables in this case
1266     # Nested queries aren't handled well and this implementation is flawed and causes users to be
1267     # unable to search for anything containing () commenting out, will be rewritten for 3.4.0
1268 #    if ( $query =~ /(\(|\))/ ) {
1269 #        return (
1270 #            undef,              $query, $simple_query, $query_cgi,
1271 #            $query,             $limit, $limit_cgi,    $limit_desc,
1272 #            'ccl'
1273 #        );
1274 #    }
1275
1276 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1277 # query operands and indexes and add stemming, truncation, field weighting, etc.
1278 # Once we do so, we'll end up with a value in $query, just like if we had an
1279 # incoming $query from the user
1280     else {
1281         $query = ""
1282           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1283         my $previous_operand
1284           ;    # a flag used to keep track if there was a previous query
1285                # if there was, we can apply the current operator
1286                # for every operand
1287         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1288
1289             # COMBINE OPERANDS, INDEXES AND OPERATORS
1290             if ( ($operands[$i] // '') ne '' ) {
1291                 $operands[$i]=~s/^\s+//;
1292
1293               # A flag to determine whether or not to add the index to the query
1294                 my $indexes_set;
1295
1296 # If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling
1297                 if ( $operands[$i] =~ /\w(:|=)/ || $scan ) {
1298                     $weight_fields    = 0;
1299                     $stemming         = 0;
1300                 } else {
1301                     $operands[$i] =~ s/\?/{?}/g; # need to escape question marks
1302                 }
1303                 my $operand = $operands[$i];
1304                 my $index   = $indexes[$i] || 'kw';
1305
1306                 # Add index-specific attributes
1307
1308                 #Afaik, this 'yr' condition will only ever be met in the staff interface advanced search
1309                 #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query,
1310                 #which is processed higher up in this sub. Other than that, year searches are typically
1311                 #handled as limits which are not processed her either.
1312
1313                 # Search ranges: Date of Publication, st-numeric
1314                 if ( $index =~ /(yr|st-numeric)/ ) {
1315                     #weight_fields/relevance search causes errors with date ranges
1316                     #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
1317                     #In the case of YYYY-YYYY, it will return no results
1318                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1319                 }
1320
1321                 # Date of Acquisition
1322                 elsif ( $index =~ /acqdate/ ) {
1323                     #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format
1324                     #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the
1325                       #top of the results just because they have lots of item records matching that date.
1326                     #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
1327                       #irrelevant here
1328                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1329                 }
1330                 # ISBN,ISSN,Standard Number, don't need special treatment
1331                 elsif ( $index eq 'nb' || $index eq 'ns' || $index eq 'hi' ) {
1332                     (
1333                         $stemming,      $auto_truncation,
1334                         $weight_fields, $fuzzy_enabled
1335                     ) = ( 0, 0, 0, 0 );
1336
1337                     if ( $index eq 'nb' ) {
1338                         if ( C4::Context->preference("SearchWithISBNVariations") ) {
1339                             my @isbns = C4::Koha::GetVariationsOfISBN( $operand );
1340                             $operands[$i] = $operand =  '(nb=' . join(' OR nb=', @isbns) . ')';
1341                             $indexes[$i] = $index = 'kw';
1342                         }
1343                     }
1344                 }
1345
1346                 # Set default structure attribute (word list)
1347                 my $struct_attr = q{};
1348                 unless ( $indexes_set || $index =~ /,(st-|phr|ext|wrdl)/ || $index =~ /^(nb|ns)$/ ) {
1349                     $struct_attr = ",wrdl";
1350                 }
1351
1352                 # Some helpful index variants
1353                 my $index_plus       = $index . $struct_attr . ':';
1354                 my $index_plus_comma = $index . $struct_attr . ',';
1355
1356                 if ($auto_truncation){
1357                         unless ( $index =~ /,(st-|phr|ext)/ ) {
1358                                                 #FIXME only valid with LTR scripts
1359                                                 $operand=join(" ",map{
1360                                                                                         (index($_,"*")>0?"$_":"$_*")
1361                                                                                          }split (/\s+/,$operand));
1362                                         }
1363                                 }
1364
1365                 # Detect Truncation
1366                 my $truncated_operand = q{};
1367                 my( $nontruncated, $righttruncated, $lefttruncated,
1368                     $rightlefttruncated, $regexpr
1369                 ) = _detect_truncation( $operand, $index );
1370
1371                 Koha::Logger->get->debug(
1372                     "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<");
1373
1374                 # Apply Truncation
1375                 if (
1376                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1377                     scalar(@$rightlefttruncated) > 0 )
1378                 {
1379
1380                # Don't field weight or add the index to the query, we do it here
1381                     $indexes_set = 1;
1382                     undef $weight_fields;
1383                     my $previous_truncation_operand;
1384                     if (scalar @$nontruncated) {
1385                         $truncated_operand .= "$index_plus @$nontruncated ";
1386                         $previous_truncation_operand = 1;
1387                     }
1388                     if (scalar @$righttruncated) {
1389                         $truncated_operand .= "and " if $previous_truncation_operand;
1390                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1391                         $previous_truncation_operand = 1;
1392                     }
1393                     if (scalar @$lefttruncated) {
1394                         $truncated_operand .= "and " if $previous_truncation_operand;
1395                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1396                         $previous_truncation_operand = 1;
1397                     }
1398                     if (scalar @$rightlefttruncated) {
1399                         $truncated_operand .= "and " if $previous_truncation_operand;
1400                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1401                         $previous_truncation_operand = 1;
1402                     }
1403                 }
1404                 $operand = $truncated_operand if $truncated_operand;
1405                 Koha::Logger->get->debug("TRUNCATED OPERAND: >$truncated_operand<");
1406
1407                 # Handle Stemming
1408                 my $stemmed_operand = q{};
1409                 $stemmed_operand = _build_stemmed_operand($operand, $lang)
1410                                                                                 if $stemming;
1411
1412                 Koha::Logger->get->debug("STEMMED OPERAND: >$stemmed_operand<");
1413
1414                 # Handle Field Weighting
1415                 my $weighted_operand = q{};
1416                 if ($weight_fields) {
1417                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1418                     $operand = $weighted_operand;
1419                     $indexes_set = 1;
1420                 }
1421
1422                 Koha::Logger->get->debug("FIELD WEIGHTED OPERAND: >$weighted_operand<");
1423
1424                 #Use relevance ranking when not using a weighted query (which adds relevance ranking of its own)
1425
1426                 #N.B. Truncation is mutually exclusive with Weighted Queries,
1427                 #so even if QueryWeightFields is turned on, QueryAutoTruncate will turn it off, thus
1428                 #the need for this relevance wrapper.
1429                 $operand = "(rk=($operand))" unless $weight_fields;
1430
1431                 ($query,$query_cgi,$query_desc,$previous_operand) = _build_initial_query({
1432                     query => $query,
1433                     query_cgi => $query_cgi,
1434                     query_desc => $query_desc,
1435                     operator => ($operators[ $i - 1 ]) ? $operators[ $i - 1 ] : '',
1436                     parsed_operand => $operand,
1437                     original_operand => $operands[$i] // '',
1438                     index => $index,
1439                     index_plus => $index_plus,
1440                     indexes_set => $indexes_set,
1441                     previous_operand => $previous_operand,
1442                 });
1443
1444             }    #/if $operands
1445         }    # /for
1446     }
1447     Koha::Logger->get->debug("QUERY BEFORE LIMITS: >$query<");
1448
1449     # add limits
1450     my %group_OR_limits;
1451     my $availability_limit;
1452     foreach my $this_limit (@limits) {
1453         next unless $this_limit;
1454         if ( $this_limit =~ /available/ ) {
1455 #
1456 ## 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1457 ## In English:
1458 ## all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1459             $availability_limit .=
1460 "( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )";
1461             $limit_cgi  .= "&limit=available";
1462             $limit_desc .= "";
1463         }
1464
1465         # group_OR_limits, prefixed by mc-
1466         # OR every member of the group
1467         elsif ( $this_limit =~ /mc/ ) {
1468             my ($k,$v) = split(/:/, $this_limit,2);
1469             if ( $k !~ /mc-i(tem)?type/ ) {
1470                 # in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes
1471                 $this_limit =~ tr/"//d;
1472                 $this_limit = $k.':"'.$v.'"';
1473             }
1474
1475             $group_OR_limits{$k} .= " or " if $group_OR_limits{$k};
1476             $limit_desc      .= " or " if $group_OR_limits{$k};
1477             $group_OR_limits{$k} .= "$this_limit";
1478             $limit_cgi       .= "&limit=" . uri_escape_utf8($this_limit);
1479             $limit_desc      .= " $this_limit";
1480         }
1481         elsif ( $this_limit =~ '^multibranchlimit:|^branch:' ) {
1482             $limit_cgi  .= "&limit=" . uri_escape_utf8($this_limit);
1483             $limit .= " and " if $limit || $query;
1484             my $branchfield  = C4::Context->preference('SearchLimitLibrary');
1485             my @branchcodes;
1486             if(  $this_limit =~ '^multibranchlimit:' ){
1487                 my ($group_id) = ( $this_limit =~ /^multibranchlimit:(.*)$/ );
1488                 my $search_group = Koha::Library::Groups->find( $group_id );
1489                 @branchcodes  = map { $_->branchcode } $search_group->all_libraries;
1490                 @branchcodes = sort { $a cmp $b } @branchcodes;
1491             } else {
1492                 @branchcodes = ( $this_limit =~ /^branch:(.*)$/ );
1493             }
1494
1495             if (@branchcodes) {
1496                 if ( $branchfield eq "homebranch" ) {
1497                     $this_limit = sprintf "(%s)", join " or ", map { 'homebranch: ' . $_ } @branchcodes;
1498                 }
1499                 elsif ( $branchfield eq "holdingbranch" ) {
1500                     $this_limit = sprintf "(%s)", join " or ", map { 'holdingbranch: ' . $_ } @branchcodes;
1501                 }
1502                 else {
1503                     $this_limit =  sprintf "(%s or %s)",
1504                       join( " or ", map { 'homebranch: ' . $_ } @branchcodes ),
1505                       join( " or ", map { 'holdingbranch: ' . $_ } @branchcodes );
1506                 }
1507             }
1508             $limit .= "$this_limit";
1509             $limit_desc .= " $this_limit";
1510         }
1511
1512         # Regular old limits
1513         else {
1514             $limit .= " and " if $limit || $query;
1515             $limit      .= "$this_limit";
1516             $limit_cgi  .= "&limit=" . uri_escape_utf8($this_limit);
1517             $limit_desc .= " $this_limit";
1518         }
1519     }
1520     foreach my $k (keys (%group_OR_limits)) {
1521         $limit .= " and " if ( $query || $limit );
1522         $limit .= "($group_OR_limits{$k})";
1523     }
1524     if ($availability_limit) {
1525         $limit .= " and " if ( $query || $limit );
1526         $limit .= "($availability_limit)";
1527     }
1528
1529     # Normalize the query and limit strings
1530     # This is flawed , means we can't search anything with : in it
1531     # if user wants to do ccl or cql, start the query with that
1532 #    $query =~ s/:/=/g;
1533     #NOTE: We use several several different regexps here as you can't have variable length lookback assertions
1534     $query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g;
1535     $query =~ s/(?<=(wrdl)):/=/g;
1536     $query =~ s/(?<=(trn|phr)):/=/g;
1537     $query =~ s/(?<=(st-numeric)):/=/g;
1538     $query =~ s/(?<=(st-year)):/=/g;
1539     $query =~ s/(?<=(st-date-normalized)):/=/g;
1540
1541     # Removing warnings for later substitutions
1542     $query        //= q{};
1543     $query_desc   //= q{};
1544     $query_cgi    //= q{};
1545     $limit        //= q{};
1546     $limit_desc   //= q{};
1547     $limit_cgi    //= q{};
1548     $simple_query //= q{};
1549     $limit =~ s/:/=/g;
1550     for ( $query, $query_desc, $limit, $limit_desc ) {
1551         s/  +/ /g;    # remove extra spaces
1552         s/^ //g;     # remove any beginning spaces
1553         s/ $//g;     # remove any ending spaces
1554         s/==/=/g;    # remove double == from query
1555     }
1556     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1557
1558     for ($query_cgi,$simple_query) {
1559         s/"//g;
1560     }
1561     # append the limit to the query
1562     $query .= " " . $limit;
1563
1564     Koha::Logger->get->debug(
1565         sprintf "buildQuery returns\nQUERY:%s\nQUERY CGI:%s\nQUERY DESC:%s\nLIMIT:%s\nLIMIT CGI:%s\nLIMIT DESC:%s",
1566         $query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc );
1567
1568     return (
1569         undef,              $query, $simple_query, $query_cgi,
1570         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1571         $query_type
1572     );
1573 }
1574
1575 =head2 _build_initial_query
1576
1577   ($query, $query_cgi, $query_desc, $previous_operand) = _build_initial_query($initial_query_params);
1578
1579   Build a section of the initial query containing indexes, operators, and operands.
1580
1581 =cut
1582
1583 sub _build_initial_query {
1584     my ($params) = @_;
1585
1586     my $operator = "";
1587     if ($params->{previous_operand}){
1588         #If there is a previous operand, add a supplied operator or the default 'and'
1589         $operator = ($params->{operator}) ? ($params->{operator}) : 'AND';
1590     }
1591
1592     #NOTE: indexes_set is typically set when doing truncation or field weighting
1593     my $operand = ($params->{indexes_set}) ? $params->{parsed_operand} : $params->{index_plus}.$params->{parsed_operand};
1594
1595     #e.g. "kw,wrdl:test"
1596     #e.g. " and kw,wrdl:test"
1597     $params->{query} .= " " . $operator . " " . $operand;
1598
1599     $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator;
1600     $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index};
1601     $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if ( $params->{original_operand} ne '' );
1602
1603     #e.g. " and kw,wrdl: test"
1604     $params->{query_desc} .= " " . $operator . " " . ( $params->{index_plus} // q{} ) . " " . ( $params->{original_operand} // q{} );
1605
1606     $params->{previous_operand} = 1 unless $params->{previous_operand}; #If there is no previous operand, mark this as one
1607
1608     return ($params->{query}, $params->{query_cgi}, $params->{query_desc}, $params->{previous_operand});
1609 }
1610
1611 =head2 searchResults
1612
1613   my @search_results = searchResults($search_context, $searchdesc, $hits,
1614                                      $results_per_page, $offset, $scan,
1615                                      @marcresults);
1616
1617 Format results in a form suitable for passing to the template
1618
1619 =cut
1620
1621 # IMO this subroutine is pretty messy still -- it's responsible for
1622 # building the HTML output for the template
1623 sub searchResults {
1624     my ( $search_context, $searchdesc, $hits, $results_per_page, $offset, $scan, $marcresults, $xslt_variables ) = @_;
1625     my $dbh = C4::Context->dbh;
1626     my @newresults;
1627
1628     require C4::Items;
1629
1630     $search_context->{'interface'} = 'opac' if !$search_context->{'interface'} || $search_context->{'interface'} ne 'intranet';
1631     my ($is_opac, $hidelostitems);
1632     if ($search_context->{'interface'} eq 'opac') {
1633         $hidelostitems = C4::Context->preference('hidelostitems');
1634         $is_opac       = 1;
1635     }
1636
1637     my $record_processor = Koha::RecordProcessor->new({
1638         filters => 'ViewPolicy'
1639     });
1640
1641     #Build branchnames hash
1642     my %branches = map { $_->branchcode => $_->branchname } Koha::Libraries->search({}, { order_by => 'branchname' })->as_list;
1643
1644 # FIXME - We build an authorised values hash here, using the default framework
1645 # though it is possible to have different authvals for different fws.
1646
1647     my $shelflocations =
1648       { map { $_->{authorised_value} => $_->{lib} } Koha::AuthorisedValues->get_descriptions_by_koha_field( { frameworkcode => '', kohafield => 'items.location' } ) };
1649
1650     # get notforloan authorised value list (see $shelflocations  FIXME)
1651     my $av = Koha::MarcSubfieldStructures->search({ frameworkcode => '', kohafield => 'items.notforloan', authorised_value => [ -and => {'!=' => undef }, {'!=' => ''}] });
1652     my $notforloan_authorised_value = $av->count ? $av->next->authorised_value : undef;
1653
1654     #Get itemtype hash
1655     my $itemtypes = Koha::ItemTypes->search_with_localization;
1656     my %itemtypes = map { $_->{itemtype} => $_ } @{ $itemtypes->unblessed };
1657
1658     #search item field code
1659     my ($itemtag, undef) = &GetMarcFromKohaField( "items.itemnumber" );
1660
1661     ## find column names of items related to MARC
1662     my %subfieldstosearch;
1663     my @columns = Koha::Database->new()->schema()->resultset('Item')->result_source->columns;
1664     for my $column ( @columns ) {
1665         my ( $tagfield, $tagsubfield ) =
1666           &GetMarcFromKohaField( "items." . $column );
1667         if ( defined $tagsubfield ) {
1668             $subfieldstosearch{$column} = $tagsubfield;
1669         }
1670     }
1671
1672     # handle which records to actually retrieve
1673     my $times; # Times is which record to process up to
1674     if ( $hits && $offset + $results_per_page <= $hits ) {
1675         $times = $offset + $results_per_page;
1676     }
1677     else {
1678         $times = $hits; # If less hits than results_per_page+offset we go to the end
1679     }
1680
1681     my $marcflavour = C4::Context->preference("marcflavour");
1682     # We get the biblionumber position in MARC
1683     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField( 'biblio.biblionumber' );
1684
1685     # set stuff for XSLT processing here once, not later again for every record we retrieved
1686
1687     my $userenv = C4::Context->userenv;
1688     my $logged_in_user
1689         = ( defined $userenv and $userenv->{number} )
1690         ? Koha::Patrons->find( $userenv->{number} )
1691         : undef;
1692     my $patron_category_hide_lost_items = ($logged_in_user) ? $logged_in_user->category->hidelostitems : 0;
1693
1694     # loop through all of the records we've retrieved
1695     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1696
1697         my $marcrecord;
1698         if ($scan) {
1699             # For Scan searches we built USMARC data
1700             $marcrecord = MARC::Record->new_from_usmarc( $marcresults->[$i]);
1701         } else {
1702             # Normal search, render from Zebra's output
1703             $marcrecord = new_record_from_zebra(
1704                 'biblioserver',
1705                 $marcresults->[$i]
1706             );
1707
1708             if ( ! defined $marcrecord ) {
1709                 warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i];
1710                 next;
1711             }
1712         }
1713
1714         my $fw = $scan
1715              ? undef
1716              : $bibliotag < 10
1717                ? GetFrameworkCode($marcrecord->field($bibliotag)->data)
1718                : GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
1719
1720         SetUTF8Flag($marcrecord);
1721         my $oldbiblio = TransformMarcToKoha( $marcrecord, $fw, 'no_items' );
1722         $oldbiblio->{result_number} = $i + 1;
1723
1724                 $oldbiblio->{normalized_upc}  = GetNormalizedUPC(       $marcrecord,$marcflavour);
1725                 $oldbiblio->{normalized_ean}  = GetNormalizedEAN(       $marcrecord,$marcflavour);
1726                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1727         $oldbiblio->{normalized_isbn} = GetNormalizedISBN($oldbiblio->{isbn},$marcrecord,$marcflavour); # Use existing ISBN from record if we got one
1728                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1729
1730                 # edition information, if any
1731         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1732
1733         my $itemtype = $oldbiblio->{itemtype} ? $itemtypes{$oldbiblio->{itemtype}} : undef;
1734         # add imageurl to itemtype if there is one
1735         $oldbiblio->{imageurl} = $itemtype ? getitemtypeimagelocation( $search_context->{'interface'}, $itemtype->{imageurl} ) : q{};
1736         # Build summary if there is one (the summary is defined in the itemtypes table)
1737         $oldbiblio->{description} = $itemtype ? $itemtype->{translated_description} : q{};
1738
1739         # Pull out the items fields
1740         my @fields = $marcrecord->field($itemtag);
1741         $marcrecord->delete_fields( @fields ) unless C4::Context->preference('PassItemMarcToXSLT');
1742         my $marcflavor = C4::Context->preference("marcflavour");
1743
1744         # adding linked items that belong to host records
1745         if ( C4::Context->preference('EasyAnalyticalRecords') ) {
1746             my $analyticsfield = '773';
1747             if ($marcflavor eq 'MARC21') {
1748                 $analyticsfield = '773';
1749             } elsif ($marcflavor eq 'UNIMARC') {
1750                 $analyticsfield = '461';
1751             }
1752             foreach my $hostfield ( $marcrecord->field($analyticsfield)) {
1753                 my $hostbiblionumber = $hostfield->subfield("0");
1754                 my $linkeditemnumber = $hostfield->subfield("9");
1755                 if( $hostbiblionumber ) {
1756                     my $linkeditemmarc = C4::Items::GetMarcItem( $hostbiblionumber, $linkeditemnumber );
1757                     if ($linkeditemmarc) {
1758                         my $linkeditemfield = $linkeditemmarc->field($itemtag);
1759                         if ($linkeditemfield) {
1760                             push( @fields, $linkeditemfield );
1761                         }
1762                     }
1763                 }
1764             }
1765         }
1766
1767         # Setting item statuses for display
1768         my @available_items_loop;
1769         my @onloan_items_loop;
1770         my @other_items_loop;
1771
1772         my $available_items;
1773         my $onloan_items;
1774         my $other_items;
1775
1776         my $ordered_count         = 0;
1777         my $available_count       = 0;
1778         my $onloan_count          = 0;
1779         my $longoverdue_count     = 0;
1780         my $other_count           = 0;
1781         my $withdrawn_count        = 0;
1782         my $itemlost_count        = 0;
1783         my $hideatopac_count      = 0;
1784         my $itembinding_count     = 0;
1785         my $itemdamaged_count     = 0;
1786         my $item_in_transit_count = 0;
1787         my $item_onhold_count     = 0;
1788         my $notforloan_count      = 0;
1789         my $item_recalled_count   = 0;
1790         my $items_count           = scalar(@fields);
1791         my $maxitems_pref = C4::Context->preference('maxItemsinSearchResults');
1792         my $maxitems = $maxitems_pref ? $maxitems_pref - 1 : 1;
1793         my @hiddenitems; # hidden itemnumbers based on OpacHiddenItems syspref
1794
1795         # loop through every item
1796         foreach my $field (@fields) {
1797             my $item;
1798
1799             # populate the items hash
1800             foreach my $code ( keys %subfieldstosearch ) {
1801                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1802             }
1803             $item->{description} = $itemtypes{ $item->{itype} }{translated_description} if $item->{itype};
1804
1805                 # OPAC hidden items
1806             if ($is_opac) {
1807                 # hidden because lost
1808                 if ($hidelostitems && $item->{itemlost}) {
1809                     push @hiddenitems, $item->{itemnumber};
1810                     $hideatopac_count++;
1811                     next;
1812                 }
1813                 # hidden based on OpacHiddenItems syspref
1814                 my @hi = C4::Items::GetHiddenItemnumbers({ items=> [ $item ], borcat => $search_context->{category} });
1815                 if (scalar @hi) {
1816                     push @hiddenitems, @hi;
1817                     $hideatopac_count++;
1818                     next;
1819                 }
1820             }
1821
1822             my $hbranch     = C4::Context->preference('StaffSearchResultsDisplayBranch');
1823             my $otherbranch = $hbranch eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1824
1825             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1826             if ($item->{$hbranch}) {
1827                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1828             }
1829             elsif ($item->{$otherbranch}) {     # Last resort
1830                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1831             }
1832
1833             my $prefix =
1834                 ( $item->{$hbranch} ? $item->{$hbranch} . '--' : q{} )
1835               . ( $item->{location} ? $item->{location} : q{} )
1836               . ( $item->{itype}    ? $item->{itype}    : q{} )
1837               . ( $item->{itemcallnumber} ? $item->{itemcallnumber} : q{} );
1838 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1839             if ( $item->{onloan}
1840                 and $logged_in_user
1841                 and !( $patron_category_hide_lost_items and $item->{itemlost} ) )
1842             {
1843                 $onloan_count++;
1844                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1845                 $onloan_items->{$key}->{due_date} = $item->{onloan};
1846                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1847                 $onloan_items->{$key}->{branchname}     = $item->{branchname};
1848                 $onloan_items->{$key}->{location}       = $shelflocations->{ $item->{location} } if $item->{location};
1849                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1850                 $onloan_items->{$key}->{description}    = $item->{description};
1851                 $onloan_items->{$key}->{imageurl} =
1852                   getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype} }->{imageurl} );
1853
1854                 # if something's checked out and lost, mark it as 'long overdue'
1855                 if ( $item->{itemlost} ) {
1856                     $onloan_items->{$key}->{longoverdue}++;
1857                     $longoverdue_count++;
1858                 }
1859             }
1860
1861          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1862             else {
1863
1864                 my $itemtype = C4::Context->preference("item-level_itypes")? $item->{itype}: $oldbiblio->{itemtype};
1865                 $item->{notforloan} = 1 if !$item->{notforloan} &&
1866                     $itemtype && $itemtypes{ $itemtype }->{notforloan};
1867
1868                 # item is on order
1869                 if ( $item->{notforloan} < 0 ) {
1870                     $ordered_count++;
1871                 } elsif ( $item->{notforloan} > 0 ) {
1872                     $notforloan_count++;
1873                 }
1874
1875                 # is item in transit?
1876                 my $transfertwhen = '';
1877                 my ($transfertfrom, $transfertto);
1878
1879                 # is item on the reserve shelf?
1880                 my $reservestatus = '';
1881
1882                 # is item a waiting recall?
1883                 my $recallstatus = '';
1884
1885                 unless ($item->{withdrawn}
1886                         || $item->{itemlost}
1887                         || $item->{damaged}
1888                         || $item->{notforloan}
1889                         || ( C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck')
1890                         && $items_count > C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') ) ) {
1891
1892                     # A couple heuristics to limit how many times
1893                     # we query the database for item transfer information, sacrificing
1894                     # accuracy in some cases for speed;
1895                     #
1896                     # 1. don't query if item has one of the other statuses
1897                     # 2. don't check transit status if the bib has
1898                     #    more than 20 items
1899                     #
1900                     # FIXME: to avoid having the query the database like this, and to make
1901                     #        the in transit status count as unavailable for search limiting,
1902                     #        should map transit status to record indexed in Zebra.
1903                     #
1904                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1905                     $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} );
1906                     if ( C4::Context->preference('UseRecalls') ) {
1907                         if ( Koha::Recalls->search({ item_id => $item->{itemnumber}, status => 'waiting' })->count ) {
1908                             $recallstatus = 'Waiting';
1909                         }
1910                     }
1911                 }
1912
1913                 # item is withdrawn, lost, damaged, not for loan, reserved or in transit
1914                 if (   $item->{withdrawn}
1915                     || $item->{itemlost}
1916                     || $item->{damaged}
1917                     || $item->{notforloan}
1918                     || $reservestatus eq 'Waiting'
1919                     || $recallstatus eq 'Waiting'
1920                     || ($transfertwhen && $transfertwhen ne ''))
1921                 {
1922                     $withdrawn_count++        if $item->{withdrawn};
1923                     $itemlost_count++        if $item->{itemlost};
1924                     $itemdamaged_count++     if $item->{damaged};
1925                     $item_in_transit_count++ if $transfertwhen && $transfertwhen ne '';
1926                     $item_onhold_count++     if $reservestatus eq 'Waiting';
1927                     $item_recalled_count++   if $recallstatus eq 'Waiting';
1928                     $item->{status} = ($item->{withdrawn}//q{}) . "-" . ($item->{itemlost}//q{}) . "-" . ($item->{damaged}//q{}) . "-" . ($item->{notforloan}//q{});
1929
1930                     $other_count++;
1931
1932                     my $key = $prefix . $item->{status};
1933                     foreach (qw(withdrawn itemlost damaged branchname itemcallnumber)) {
1934                         $other_items->{$key}->{$_} = $item->{$_};
1935                     }
1936                     $other_items->{$key}->{intransit} = ( $transfertwhen ne '' ) ? 1 : 0;
1937                     $other_items->{$key}->{recalled} = ($recallstatus) ? 1 : 0;
1938                     $other_items->{$key}->{onhold} = ($reservestatus) ? 1 : 0;
1939                     $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value and $item->{notforloan};
1940                     $other_items->{$key}->{count}++ if $item->{$hbranch};
1941                     $other_items->{$key}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
1942                     $other_items->{$key}->{description} = $item->{description};
1943                     $other_items->{$key}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
1944                 }
1945                 # item is available
1946                 else {
1947                     $available_count++;
1948                     $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1949                     foreach (qw(branchname itemcallnumber description)) {
1950                         $available_items->{$prefix}->{$_} = $item->{$_};
1951                     }
1952                     $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
1953                     $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
1954                 }
1955             }
1956         }    # notforloan, item level and biblioitem level
1957
1958         # if all items are hidden, do not show the record
1959         if ( C4::Context->preference('OpacHiddenItemsHidesRecord') && $items_count > 0 && $hideatopac_count == $items_count) {
1960             next;
1961         }
1962
1963         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1964         for my $key ( sort keys %$onloan_items ) {
1965             (++$onloanitemscount > $maxitems) and last;
1966             push @onloan_items_loop, $onloan_items->{$key};
1967         }
1968         for my $key ( sort keys %$other_items ) {
1969             (++$otheritemscount > $maxitems) and last;
1970             push @other_items_loop, $other_items->{$key};
1971         }
1972         for my $key ( sort keys %$available_items ) {
1973             (++$availableitemscount > $maxitems) and last;
1974             push @available_items_loop, $available_items->{$key}
1975         }
1976
1977         # XSLT processing of some stuff
1978         # we fetched the sysprefs already before the loop through all retrieved record!
1979         if (!$scan) {
1980             $record_processor->options({
1981                 frameworkcode => $fw,
1982                 interface     => $search_context->{'interface'}
1983             });
1984
1985             $record_processor->process($marcrecord);
1986
1987             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1988                 {
1989                     biblionumber => $oldbiblio->{biblionumber},
1990                     record       => $marcrecord,
1991                     xsl_syspref  => (
1992                         $is_opac
1993                         ? 'OPACXSLTResultsDisplay'
1994                         : 'XSLTResultsDisplay'
1995                     ),
1996                     fix_amps       => 1,
1997                     hidden_items   => \@hiddenitems,
1998                     xslt_variables => $xslt_variables
1999                 }
2000             );
2001         }
2002
2003         my $biblio_object = Koha::Biblios->find( $oldbiblio->{biblionumber} );
2004         $oldbiblio->{biblio_object} = $biblio_object;
2005
2006         my $can_place_holds = 1;
2007         # if biblio level itypes are used and itemtype is notforloan, it can't be reserved either
2008         if (!C4::Context->preference("item-level_itypes")) {
2009             if ($itemtype && $itemtype->{notforloan}) {
2010                 $can_place_holds = 0;
2011             }
2012         } else {
2013             $can_place_holds = $biblio_object->items->filter_by_for_hold()->count if $biblio_object;
2014         }
2015         $oldbiblio->{norequests} = 1 unless $can_place_holds;
2016         $oldbiblio->{items_count}          = $items_count;
2017         $oldbiblio->{available_items_loop} = \@available_items_loop;
2018         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
2019         $oldbiblio->{other_items_loop}     = \@other_items_loop;
2020         $oldbiblio->{availablecount}       = $available_count;
2021         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
2022         $oldbiblio->{onloancount}          = $onloan_count;
2023         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
2024         $oldbiblio->{othercount}           = $other_count;
2025         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
2026         $oldbiblio->{withdrawncount}        = $withdrawn_count;
2027         $oldbiblio->{itemlostcount}        = $itemlost_count;
2028         $oldbiblio->{damagedcount}         = $itemdamaged_count;
2029         $oldbiblio->{intransitcount}       = $item_in_transit_count;
2030         $oldbiblio->{onholdcount}          = $item_onhold_count;
2031         $oldbiblio->{recalledcount}        = $item_recalled_count;
2032         $oldbiblio->{orderedcount}         = $ordered_count;
2033         $oldbiblio->{notforloancount}      = $notforloan_count;
2034
2035         if (C4::Context->preference("AlternateHoldingsField") && $items_count == 0) {
2036             my $fieldspec = C4::Context->preference("AlternateHoldingsField");
2037             my $subfields = substr $fieldspec, 3;
2038             my $holdingsep = C4::Context->preference("AlternateHoldingsSeparator") || ' ';
2039             my @alternateholdingsinfo = ();
2040             my @holdingsfields = $marcrecord->field(substr $fieldspec, 0, 3);
2041             my $alternateholdingscount = 0;
2042
2043             for my $field (@holdingsfields) {
2044                 my %holding = ( holding => '' );
2045                 my $havesubfield = 0;
2046                 for my $subfield ($field->subfields()) {
2047                     if ((index $subfields, $$subfield[0]) >= 0) {
2048                         $holding{'holding'} .= $holdingsep if (length $holding{'holding'} > 0);
2049                         $holding{'holding'} .= $$subfield[1];
2050                         $havesubfield++;
2051                     }
2052                 }
2053                 if ($havesubfield) {
2054                     push(@alternateholdingsinfo, \%holding);
2055                     $alternateholdingscount++;
2056                 }
2057             }
2058
2059             $oldbiblio->{'ALTERNATEHOLDINGS'} = \@alternateholdingsinfo;
2060             $oldbiblio->{'alternateholdings_count'} = $alternateholdingscount;
2061         }
2062
2063         push( @newresults, $oldbiblio );
2064     }
2065
2066     return @newresults;
2067 }
2068
2069 =head2 enabled_staff_search_views
2070
2071 %hash = enabled_staff_search_views()
2072
2073 This function returns a hash that contains three flags obtained from the system
2074 preferences, used to determine whether a particular staff search results view
2075 is enabled.
2076
2077 =over 2
2078
2079 =item C<Output arg:>
2080
2081     * $hash{can_view_MARC} is true only if the MARC view is enabled
2082     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2083     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2084
2085 =item C<usage in the script:>
2086
2087 =back
2088
2089 $template->param ( C4::Search::enabled_staff_search_views );
2090
2091 =cut
2092
2093 sub enabled_staff_search_views
2094 {
2095         return (
2096                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2097                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2098                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2099         );
2100 }
2101
2102 =head2 z3950_search_args
2103
2104 $arrayref = z3950_search_args($matchpoints)
2105
2106 This function returns an array reference that contains the search parameters to be
2107 passed to the Z39.50 search script (z3950_search.pl). The array elements
2108 are hash refs whose keys are name and value, and whose values are the
2109 name of a search parameter, the value of that search parameter and the URL encoded
2110 value of that parameter.
2111
2112 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2113
2114 The search parameter values are obtained from the bibliographic record whose
2115 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2116
2117 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2118 a general purpose search argument. In this case, the returned array contains only
2119 entry: the key is 'title' and the value is derived from $matchpoints.
2120
2121 If a search parameter value is undefined or empty, it is not included in the returned
2122 array.
2123
2124 The returned array reference may be passed directly to the template parameters.
2125
2126 =over 2
2127
2128 =item C<Output arg:>
2129
2130     * $array containing hash refs as described above
2131
2132 =item C<usage in the script:>
2133
2134 =back
2135
2136 $data = Biblio::GetBiblioData($bibno);
2137 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2138
2139 *OR*
2140
2141 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2142
2143 =cut
2144
2145 sub z3950_search_args {
2146     my $bibrec = shift;
2147
2148     my $isbn_string = ref( $bibrec ) ? $bibrec->{title} : $bibrec;
2149     my $isbn = Business::ISBN->new( $isbn_string );
2150
2151     if (defined $isbn && $isbn->is_valid)
2152     {
2153         if ( ref($bibrec) ) {
2154             $bibrec->{isbn} = $isbn_string;
2155             $bibrec->{title} = undef;
2156         } else {
2157             $bibrec = { isbn => $isbn_string };
2158         }
2159     }
2160     else {
2161         $bibrec = { title => $bibrec } if !ref $bibrec;
2162     }
2163     my $array = [];
2164     for my $field (qw/ lccn isbn issn title author dewey subject /)
2165     {
2166         push @$array, { name => $field, value => $bibrec->{$field} }
2167           if defined $bibrec->{$field};
2168     }
2169     return $array;
2170 }
2171
2172 =head2 GetDistinctValues($field);
2173
2174 C<$field> is a reference to the fields array
2175
2176 =cut
2177
2178 sub GetDistinctValues {
2179     my ($fieldname,$string)=@_;
2180     # returns a reference to a hash of references to branches...
2181     if ($fieldname=~/\./){
2182                         my ($table,$column)=split /\./, $fieldname;
2183                         my $dbh = C4::Context->dbh;
2184                         my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
2185                         $sth->execute;
2186                         my $elements=$sth->fetchall_arrayref({});
2187                         return $elements;
2188    }
2189    else {
2190                 $string||= qq("");
2191                 my @servers=qw<biblioserver authorityserver>;
2192                 my (@zconns,@results);
2193         for ( my $i = 0 ; $i < @servers ; $i++ ) {
2194                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
2195                         $results[$i] =
2196                       $zconns[$i]->scan(
2197                         ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
2198                       );
2199                 }
2200                 # The big moment: asynchronously retrieve results from all servers
2201                 my @elements;
2202         _ZOOM_event_loop(
2203             \@zconns,
2204             \@results,
2205             sub {
2206                 my ( $i, $size ) = @_;
2207                 for ( my $j = 0 ; $j < $size ; $j++ ) {
2208                     my %hashscan;
2209                     @hashscan{qw(value cnt)} =
2210                       $results[ $i - 1 ]->display_term($j);
2211                     push @elements, \%hashscan;
2212                 }
2213             }
2214         );
2215                 return \@elements;
2216    }
2217 }
2218
2219 =head2 _ZOOM_event_loop
2220
2221     _ZOOM_event_loop(\@zconns, \@results, sub {
2222         my ( $i, $size ) = @_;
2223         ....
2224     } );
2225
2226 Processes a ZOOM event loop and passes control to a closure for
2227 processing the results, and destroying the resultsets.
2228
2229 =cut
2230
2231 sub _ZOOM_event_loop {
2232     my ($zconns, $results, $callback) = @_;
2233     while ( ( my $i = ZOOM::event( $zconns ) ) != 0 ) {
2234         my $ev = $zconns->[ $i - 1 ]->last_event();
2235         if ( $ev == ZOOM::Event::ZEND ) {
2236             next unless $results->[ $i - 1 ];
2237             my $size = $results->[ $i - 1 ]->size();
2238             if ( $size > 0 ) {
2239                 $callback->($i, $size);
2240             }
2241         }
2242     }
2243
2244     foreach my $result (@$results) {
2245         $result->destroy();
2246     }
2247 }
2248
2249 =head2 new_record_from_zebra
2250
2251 Given raw data from a searchengine result set, return a MARC::Record object
2252
2253 This helper function is needed to take into account all the involved
2254 system preferences and configuration variables to properly create the
2255 MARC::Record object.
2256
2257 If we are using GRS-1, then the raw data we get from Zebra should be USMARC
2258 data. If we are using DOM, then it has to be MARCXML.
2259
2260 If we are using elasticsearch, it'll already be a MARC::Record and this
2261 function needs a new name.
2262
2263 =cut
2264
2265 sub new_record_from_zebra {
2266
2267     my $server   = shift;
2268     my $raw_data = shift;
2269     # Set the default indexing modes
2270     my $search_engine = C4::Context->preference("SearchEngine");
2271     if ($search_engine eq 'Elasticsearch') {
2272         return ref $raw_data eq 'MARC::Record' ? $raw_data : MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2273     }
2274     my $index_mode = ( $server eq 'biblioserver' )
2275                         ? C4::Context->config('zebra_bib_index_mode') // 'dom'
2276                         : C4::Context->config('zebra_auth_index_mode') // 'dom';
2277
2278     my $marc_record =  eval {
2279         if ( $index_mode eq 'dom' ) {
2280             MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2281         } else {
2282             MARC::Record->new_from_usmarc( $raw_data );
2283         }
2284     };
2285
2286     if ($@) {
2287         return;
2288     } else {
2289         return $marc_record;
2290     }
2291
2292 }
2293
2294 END { }    # module clean-up code here (global destructor)
2295
2296 1;
2297 __END__
2298
2299 =head1 AUTHOR
2300
2301 Koha Development Team <http://koha-community.org/>
2302
2303 =cut