C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it
   6 # under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 3 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but
  11 # WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  17
  18 use Modern::Perl;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  22 use C4::Koha;      # getFacets
  23 use Koha::DateUtils;
  24 use Koha::Libraries;
  25 use Lingua::Stem;
  26 use C4::Search::PazPar2;
  27 use XML::Simple;
  28 use C4::XSLT;
  29 use C4::Reserves;    # GetReserveStatus
  30 use C4::Debug;
  31 use C4::Charset;
  32 use Koha::AuthorisedValues;
  33 use Koha::ItemTypes;
  34 use Koha::Libraries;
  35 use Koha::Patrons;
  36 use Koha::RecordProcessor;
  37 use YAML;
  38 use URI::Escape;
  39 use Business::ISBN;
  40 use MARC::Record;
  41 use MARC::Field;
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  43
  44 BEGIN {
  45     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  46 }
  47
  48 =head1 NAME
  49
  50 C4::Search - Functions for searching the Koha catalog.
  51
  52 =head1 SYNOPSIS
  53
  54 See opac/opac-search.pl or catalogue/search.pl for example of usage
  55
  56 =head1 DESCRIPTION
  57
  58 This module provides searching functions for Koha's bibliographic databases
  59
  60 =head1 FUNCTIONS
  61
  62 =cut
  63
  64 @ISA    = qw(Exporter);
  65 @EXPORT = qw(
  66   &FindDuplicate
  67   &SimpleSearch
  68   &searchResults
  69   &getRecords
  70   &buildQuery
  71   &GetDistinctValues
  72   &enabled_staff_search_views
  73 );
  74
  75 # make all your functions, whether exported or not;
  76
  77 =head2 FindDuplicate
  78
  79 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  80
  81 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  82
  83 =cut
  84
  85 sub FindDuplicate {
  86     my ($record) = @_;
  87     my $dbh = C4::Context->dbh;
  88     my $result = TransformMarcToKoha( $record, '' );
  89     my $sth;
  90     my $query;
  91
  92     # search duplicate on ISBN, easy and fast..
  93     # ... normalize first
  94     if ( $result->{isbn} ) {
  95         $result->{isbn} =~ s/\(.*$//;
  96         $result->{isbn} =~ s/\s+$//;
  97         $query = "isbn:$result->{isbn}";
  98     }
  99     else {
 100
 101         my $titleindex = 'ti,ext';
 102         my $authorindex = 'au,ext';
 103         my $op = 'and';
 104
 105         $result->{title} =~ s /\\//g;
 106         $result->{title} =~ s /\"//g;
 107         $result->{title} =~ s /\(//g;
 108         $result->{title} =~ s /\)//g;
 109
 110         $query = "$titleindex:\"$result->{title}\"";
 111         if   ( $result->{author} ) {
 112             $result->{author} =~ s /\\//g;
 113             $result->{author} =~ s /\"//g;
 114             $result->{author} =~ s /\(//g;
 115             $result->{author} =~ s /\)//g;
 116
 117             $query .= " $op $authorindex:\"$result->{author}\"";
 118         }
 119     }
 120
 121     my $searcher = Koha::SearchEngine::Search->new({index => $Koha::SearchEngine::BIBLIOS_INDEX});
 122     my ( $error, $searchresults, undef ) = $searcher->simple_search_compat($query,0,50);
 123     my @results;
 124     if (!defined $error) {
 125         foreach my $possible_duplicate_record (@{$searchresults}) {
 126             my $marcrecord = new_record_from_zebra(
 127                 'biblioserver',
 128                 $possible_duplicate_record
 129             );
 130
 131             my $result = TransformMarcToKoha( $marcrecord, '' );
 132
 133             # FIXME :: why 2 $biblionumber ?
 134             if ($result) {
 135                 push @results, $result->{'biblionumber'};
 136                 push @results, $result->{'title'};
 137             }
 138         }
 139     }
 140     return @results;
 141 }
 142
 143 =head2 SimpleSearch
 144
 145 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers], [%options] );
 146
 147 This function provides a simple search API on the bibliographic catalog
 148
 149 =over 2
 150
 151 =item C<input arg:>
 152
 153     * $query can be a simple keyword or a complete CCL query
 154     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 155     * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0
 156     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 157     * %options is optional. (e.g. "skip_normalize" allows you to skip changing : to = )
 158
 159
 160 =item C<Return:>
 161
 162     Returns an array consisting of three elements
 163     * $error is undefined unless an error is detected
 164     * $results is a reference to an array of records.
 165     * $total_hits is the number of hits that would have been returned with no limit
 166
 167     If an error is returned the two other return elements are undefined. If error itself is undefined
 168     the other two elements are always defined
 169
 170 =item C<usage in the script:>
 171
 172 =back
 173
 174 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 175
 176 if (defined $error) {
 177     $template->param(query_error => $error);
 178     warn "error: ".$error;
 179     output_html_with_http_headers $input, $cookie, $template->output;
 180     exit;
 181 }
 182
 183 my $hits = @{$marcresults};
 184 my @results;
 185
 186 for my $r ( @{$marcresults} ) {
 187     my $marcrecord = MARC::File::USMARC::decode($r);
 188     my $biblio = TransformMarcToKoha($marcrecord,q{});
 189
 190     #build the iarray of hashs for the template.
 191     push @results, {
 192         title           => $biblio->{'title'},
 193         subtitle        => $biblio->{'subtitle'},
 194         biblionumber    => $biblio->{'biblionumber'},
 195         author          => $biblio->{'author'},
 196         publishercode   => $biblio->{'publishercode'},
 197         publicationyear => $biblio->{'publicationyear'},
 198         };
 199
 200 }
 201
 202 $template->param(result=>\@results);
 203
 204 =cut
 205
 206 sub SimpleSearch {
 207     my ( $query, $offset, $max_results, $servers, %options )  = @_;
 208
 209     return ( 'No query entered', undef, undef ) unless $query;
 210     # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 211     my @servers = defined ( $servers ) ? @$servers : ( 'biblioserver' );
 212     my @zoom_queries;
 213     my @tmpresults;
 214     my @zconns;
 215     my $results = [];
 216     my $total_hits = 0;
 217
 218     # Initialize & Search Zebra
 219     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 220         eval {
 221             $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 222             $query =~ s/:/=/g unless $options{skip_normalize};
 223             $zoom_queries[$i] = ZOOM::Query::CCL2RPN->new( $query, $zconns[$i]);
 224             $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 225
 226             # error handling
 227             my $error =
 228                 $zconns[$i]->errmsg() . " ("
 229               . $zconns[$i]->errcode() . ") "
 230               . $zconns[$i]->addinfo() . " "
 231               . $zconns[$i]->diagset();
 232
 233             return ( $error, undef, undef ) if $zconns[$i]->errcode();
 234         };
 235         if ($@) {
 236
 237             # caught a ZOOM::Exception
 238             my $error =
 239                 $@->message() . " ("
 240               . $@->code() . ") "
 241               . $@->addinfo() . " "
 242               . $@->diagset();
 243             warn $error." for query: $query";
 244             return ( $error, undef, undef );
 245         }
 246     }
 247
 248     _ZOOM_event_loop(
 249         \@zconns,
 250         \@tmpresults,
 251         sub {
 252             my ($i, $size) = @_;
 253             my $first_record = defined($offset) ? $offset + 1 : 1;
 254             my $hits = $tmpresults[ $i - 1 ]->size();
 255             $total_hits += $hits;
 256             my $last_record = $hits;
 257             if ( defined $max_results && $offset + $max_results < $hits ) {
 258                 $last_record = $offset + $max_results;
 259             }
 260
 261             for my $j ( $first_record .. $last_record ) {
 262                 my $record = eval {
 263                   $tmpresults[ $i - 1 ]->record( $j - 1 )->raw()
 264                   ;    # 0 indexed
 265                 };
 266                 push @{$results}, $record if defined $record;
 267             }
 268         }
 269     );
 270
 271     foreach my $zoom_query (@zoom_queries) {
 272         $zoom_query->destroy();
 273     }
 274
 275     return ( undef, $results, $total_hits );
 276 }
 277
 278 =head2 getRecords
 279
 280 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 281
 282         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 283         $results_per_page, $offset,       $branches,       $itemtypes,
 284         $query_type,       $scan,         $opac
 285     );
 286
 287 The all singing, all dancing, multi-server, asynchronous, scanning,
 288 searching, record nabbing, facet-building
 289
 290 See verbose embedded documentation.
 291
 292 =cut
 293
 294 sub getRecords {
 295     my (
 296         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 297         $results_per_page, $offset,       $branches,         $itemtypes,
 298         $query_type,       $scan,         $opac
 299     ) = @_;
 300
 301     my @servers = @$servers_ref;
 302     my @sort_by = @$sort_by_ref;
 303     $offset = 0 if $offset < 0;
 304
 305     # Initialize variables for the ZOOM connection and results object
 306     my @zconns;
 307     my @results;
 308     my $results_hashref = ();
 309
 310     # TODO simplify this structure ( { branchcode => $branchname } is enought) and remove this parameter
 311     $branches ||= { map { $_->branchcode => { branchname => $_->branchname } } Koha::Libraries->search };
 312
 313     # Initialize variables for the faceted results objects
 314     my $facets_counter = {};
 315     my $facets_info    = {};
 316     my $facets         = getFacets();
 317
 318     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 319
 320     ### LOOP THROUGH THE SERVERS
 321     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 322         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 323
 324 # perform the search, create the results objects
 325 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 326         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 327
 328         #$query_to_use = $simple_query if $scan;
 329         warn $simple_query if ( $scan and $DEBUG );
 330
 331         # Check if we've got a query_type defined, if so, use it
 332         eval {
 333             if ($query_type) {
 334                 if ($query_type =~ /^ccl/) {
 335                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 336                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 337                 } elsif ($query_type =~ /^cql/) {
 338                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CQL->new($query_to_use, $zconns[$i]));
 339                 } elsif ($query_type =~ /^pqf/) {
 340                     $results[$i] = $zconns[$i]->search(ZOOM::Query::PQF->new($query_to_use, $zconns[$i]));
 341                 } else {
 342                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 343                 }
 344             } elsif ($scan) {
 345                     $results[$i] = $zconns[$i]->scan(  ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 346             } else {
 347                     $results[$i] = $zconns[$i]->search(ZOOM::Query::CCL2RPN->new($query_to_use, $zconns[$i]));
 348             }
 349         };
 350         if ($@) {
 351             warn "WARNING: query problem with $query_to_use " . $@;
 352         }
 353
 354         # Concatenate the sort_by limits and pass them to the results object
 355         # Note: sort will override rank
 356         my $sort_by;
 357         foreach my $sort (@sort_by) {
 358             if ( $sort eq "author_az" || $sort eq "author_asc" ) {
 359                 $sort_by .= "1=1003 <i ";
 360             }
 361             elsif ( $sort eq "author_za" || $sort eq "author_dsc" ) {
 362                 $sort_by .= "1=1003 >i ";
 363             }
 364             elsif ( $sort eq "popularity_asc" ) {
 365                 $sort_by .= "1=9003 <i ";
 366             }
 367             elsif ( $sort eq "popularity_dsc" ) {
 368                 $sort_by .= "1=9003 >i ";
 369             }
 370             elsif ( $sort eq "call_number_asc" ) {
 371                 $sort_by .= "1=8007  <i ";
 372             }
 373             elsif ( $sort eq "call_number_dsc" ) {
 374                 $sort_by .= "1=8007 >i ";
 375             }
 376             elsif ( $sort eq "pubdate_asc" ) {
 377                 $sort_by .= "1=31 <i ";
 378             }
 379             elsif ( $sort eq "pubdate_dsc" ) {
 380                 $sort_by .= "1=31 >i ";
 381             }
 382             elsif ( $sort eq "acqdate_asc" ) {
 383                 $sort_by .= "1=32 <i ";
 384             }
 385             elsif ( $sort eq "acqdate_dsc" ) {
 386                 $sort_by .= "1=32 >i ";
 387             }
 388             elsif ( $sort eq "title_az" || $sort eq "title_asc" ) {
 389                 $sort_by .= "1=4 <i ";
 390             }
 391             elsif ( $sort eq "title_za" || $sort eq "title_dsc" ) {
 392                 $sort_by .= "1=4 >i ";
 393             }
 394             else {
 395                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 396             }
 397         }
 398         if ( $sort_by && !$scan && $results[$i] ) {
 399             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 400                 warn "WARNING sort $sort_by failed";
 401             }
 402         }
 403     }    # finished looping through servers
 404
 405     # The big moment: asynchronously retrieve results from all servers
 406         _ZOOM_event_loop(
 407             \@zconns,
 408             \@results,
 409             sub {
 410                 my ( $i, $size ) = @_;
 411                 my $results_hash;
 412
 413                 # loop through the results
 414                 $results_hash->{'hits'} = $size;
 415                 my $times;
 416                 if ( $offset + $results_per_page <= $size ) {
 417                     $times = $offset + $results_per_page;
 418                 }
 419                 else {
 420                     $times = $size;
 421                 }
 422
 423                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 424                     my $record;
 425
 426                     ## Check if it's an index scan
 427                     if ($scan) {
 428                         my ( $term, $occ ) = $results[ $i - 1 ]->display_term($j);
 429
 430                  # here we create a minimal MARC record and hand it off to the
 431                  # template just like a normal result ... perhaps not ideal, but
 432                  # it works for now
 433                         my $tmprecord = MARC::Record->new();
 434                         $tmprecord->encoding('UTF-8');
 435                         my $tmptitle;
 436                         my $tmpauthor;
 437
 438                 # the minimal record in author/title (depending on MARC flavour)
 439                         if ( C4::Context->preference("marcflavour") eq
 440                             "UNIMARC" )
 441                         {
 442                             $tmptitle = MARC::Field->new(
 443                                 '200', ' ', ' ',
 444                                 a => $term,
 445                                 f => $occ
 446                             );
 447                             $tmprecord->append_fields($tmptitle);
 448                         }
 449                         else {
 450                             $tmptitle =
 451                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 452                             $tmpauthor =
 453                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 454                             $tmprecord->append_fields($tmptitle);
 455                             $tmprecord->append_fields($tmpauthor);
 456                         }
 457                         $results_hash->{'RECORDS'}[$j] =
 458                           $tmprecord->as_usmarc();
 459                     }
 460
 461                     # not an index scan
 462                     else {
 463                         $record = $results[ $i - 1 ]->record($j)->raw();
 464                         # warn "RECORD $j:".$record;
 465                         $results_hash->{'RECORDS'}[$j] = $record;
 466                     }
 467
 468                 }
 469                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 470
 471                 # Fill the facets while we're looping, but only for the
 472                 # biblioserver and not for a scan
 473                 if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) {
 474                     $facets_counter = GetFacets( $results[ $i - 1 ] );
 475                     $facets_info    = _get_facets_info( $facets );
 476                 }
 477
 478                 # BUILD FACETS
 479                 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 480                     for my $link_value (
 481                         sort { $a cmp $b } keys %$facets_counter
 482                       )
 483                     {
 484                         my @this_facets_array;
 485                         for my $one_facet (
 486                             sort {
 487                                 $facets_counter->{$link_value}
 488                                   ->{$b} <=> $facets_counter->{$link_value}
 489                                   ->{$a}
 490                             } keys %{ $facets_counter->{$link_value} }
 491                           )
 492                         {
 493 # Sanitize the link value : parenthesis, question and exclamation mark will cause errors with CCL
 494                             my $facet_link_value = $one_facet;
 495                             $facet_link_value =~ s/[()!?¡¿؟]/ /g;
 496
 497                             # fix the length that will display in the label,
 498                             my $facet_label_value = $one_facet;
 499                             my $facet_max_length  = C4::Context->preference(
 500                                 'FacetLabelTruncationLength')
 501                               || 20;
 502                             $facet_label_value =
 503                               substr( $one_facet, 0, $facet_max_length )
 504                               . "..."
 505                               if length($facet_label_value) >
 506                                   $facet_max_length;
 507
 508                         # if it's a branch, label by the name, not the code,
 509                             if ( $link_value =~ /branch/ ) {
 510                                 if (   defined $branches
 511                                     && ref($branches) eq "HASH"
 512                                     && defined $branches->{$one_facet}
 513                                     && ref( $branches->{$one_facet} ) eq
 514                                     "HASH" )
 515                                 {
 516                                     $facet_label_value =
 517                                       $branches->{$one_facet}
 518                                       ->{'branchname'};
 519                                 }
 520                                 else {
 521                                     $facet_label_value = "*";
 522                                 }
 523                             }
 524
 525                       # if it's a itemtype, label by the name, not the code,
 526                             if ( $link_value =~ /itype/ ) {
 527                                 if (   defined $itemtypes
 528                                     && ref($itemtypes) eq "HASH"
 529                                     && defined $itemtypes->{$one_facet}
 530                                     && ref( $itemtypes->{$one_facet} ) eq
 531                                     "HASH" )
 532                                 {
 533                                     $facet_label_value =
 534                                       $itemtypes->{$one_facet}
 535                                       ->{translated_description};
 536                                 }
 537                             }
 538
 539            # also, if it's a location code, use the name instead of the code
 540                             if ( $link_value =~ /location/ ) {
 541                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 542                                 my $av = Koha::AuthorisedValues->search({ category => 'LOC', authorised_value => $one_facet });
 543                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 544                             }
 545
 546                             # also, if it's a collection code, use the name instead of the code
 547                             if ( $link_value =~ /ccode/ ) {
 548                                 # TODO Retrieve all authorised values at once, instead of 1 query per entry
 549                                 my $av = Koha::AuthorisedValues->search({ category => 'CCODE', authorised_value => $one_facet });
 550                                 $facet_label_value = $av->count ? $av->next->opac_description : '';
 551                             }
 552
 553             # but we're down with the whole label being in the link's title.
 554                             push @this_facets_array,
 555                               {
 556                                 facet_count =>
 557                                   $facets_counter->{$link_value}
 558                                   ->{$one_facet},
 559                                 facet_label_value => $facet_label_value,
 560                                 facet_title_value => $one_facet,
 561                                 facet_link_value  => $facet_link_value,
 562                                 type_link_value   => $link_value,
 563                               }
 564                               if ($facet_label_value);
 565                         }
 566
 567                         push @facets_loop,
 568                           {
 569                             type_link_value => $link_value,
 570                             type_id         => $link_value . "_id",
 571                             "type_label_"
 572                               . $facets_info->{$link_value}->{'label_value'} =>
 573                               1,
 574                             facets     => \@this_facets_array,
 575                           }
 576                           unless (
 577                             (
 578                                 $facets_info->{$link_value}->{'label_value'} =~
 579                                 /Libraries/
 580                             )
 581                             and ( Koha::Libraries->search->count == 1 )
 582                           );
 583                     }
 584                 }
 585             }
 586         );
 587
 588     # This sorts the facets into alphabetical order
 589     if (@facets_loop) {
 590         foreach my $f (@facets_loop) {
 591             $f->{facets} = [ sort { uc($a->{facet_label_value}) cmp uc($b->{facet_label_value}) } @{ $f->{facets} } ];
 592         }
 593     }
 594
 595     return ( undef, $results_hashref, \@facets_loop );
 596 }
 597
 598 sub GetFacets {
 599
 600     my $rs = shift;
 601     my $facets;
 602
 603     my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0;
 604
 605     if ( $use_zebra_facets ) {
 606         $facets = _get_facets_from_zebra( $rs );
 607     } else {
 608         $facets = _get_facets_from_records( $rs );
 609     }
 610
 611     return $facets;
 612 }
 613
 614 sub _get_facets_from_records {
 615
 616     my $rs = shift;
 617
 618     my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20;
 619     my $facets_config  = getFacets();
 620     my $facets         = {};
 621     my $size           = $rs->size();
 622     my $jmax           = $size > $facets_maxrecs
 623                             ? $facets_maxrecs
 624                             : $size;
 625
 626     for ( my $j = 0 ; $j < $jmax ; $j++ ) {
 627
 628         my $marc_record = new_record_from_zebra (
 629                 'biblioserver',
 630                 $rs->record( $j )->raw()
 631         );
 632
 633         if ( ! defined $marc_record ) {
 634             warn "ERROR DECODING RECORD - $@: " .
 635                 $rs->record( $j )->raw();
 636             next;
 637         }
 638
 639         _get_facets_data_from_record( $marc_record, $facets_config, $facets );
 640     }
 641
 642     return $facets;
 643 }
 644
 645 =head2 _get_facets_data_from_record
 646
 647     C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter );
 648
 649 Internal function that extracts facets information from a MARC::Record object
 650 and populates $facets_counter for using in getRecords.
 651
 652 $facets is expected to be filled with C4::Koha::getFacets output (i.e. the configured
 653 facets for Zebra).
 654
 655 =cut
 656
 657 sub _get_facets_data_from_record {
 658
 659     my ( $marc_record, $facets, $facets_counter ) = @_;
 660
 661     for my $facet (@$facets) {
 662
 663         my @used_datas = ();
 664
 665         foreach my $tag ( @{ $facet->{ tags } } ) {
 666
 667             # tag number is the first three digits
 668             my $tag_num          = substr( $tag, 0, 3 );
 669             # subfields are the remainder
 670             my $subfield_letters = substr( $tag, 3 );
 671
 672             my @fields = $marc_record->field( $tag_num );
 673             foreach my $field (@fields) {
 674                 # If $field->indicator(1) eq 'z', it means it is a 'see from'
 675                 # field introduced because of IncludeSeeFromInSearches, so skip it
 676                 next if $field->indicator(1) eq 'z';
 677
 678                 my $data = $field->as_string( $subfield_letters, $facet->{ sep } );
 679                 $data =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
 680
 681                 unless ( grep { $_ eq $data } @used_datas ) {
 682                     push @used_datas, $data;
 683                     $facets_counter->{ $facet->{ idx } }->{ $data }++;
 684                 }
 685             }
 686         }
 687     }
 688 }
 689
 690 =head2 _get_facets_from_zebra
 691
 692     my $facets = _get_facets_from_zebra( $result_set )
 693
 694 Retrieves facets for a specified result set. It loops through the facets defined
 695 in C4::Koha::getFacets and returns a hash with the following structure:
 696
 697    {  facet_idx => {
 698             facet_value => count
 699       },
 700       ...
 701    }
 702
 703 =cut
 704
 705 sub _get_facets_from_zebra {
 706
 707     my $rs = shift;
 708
 709     # save current elementSetName
 710     my $elementSetName = $rs->option( 'elementSetName' );
 711
 712     my $facets_loop = getFacets();
 713     my $facets_data  = {};
 714     # loop through defined facets and fill the facets hashref
 715     foreach my $facet ( @$facets_loop ) {
 716
 717         my $idx = $facet->{ idx };
 718         my $sep = $facet->{ sep };
 719         my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep );
 720         if ( $facet_values ) {
 721             # we've actually got a result
 722             $facets_data->{ $idx } = $facet_values;
 723         }
 724     }
 725     # set elementSetName to its previous value to avoid side effects
 726     $rs->option( elementSetName => $elementSetName );
 727
 728     return $facets_data;
 729 }
 730
 731 =head2 _get_facet_from_result_set
 732
 733     my $facet_values =
 734         C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep )
 735
 736 Internal function that extracts facet information for a specific index ($facet_idx) and
 737 returns a hash containing facet values and count:
 738
 739     {
 740         $facet_value => $count ,
 741         ...
 742     }
 743
 744 Warning: this function has the side effect of changing the elementSetName for the result
 745 set. It is a helper function for the main loop, which takes care of backing it up for
 746 restoring.
 747
 748 =cut
 749
 750 sub _get_facet_from_result_set {
 751
 752     my $facet_idx = shift;
 753     my $rs        = shift;
 754     my $sep       = shift;
 755
 756     my $internal_sep  = '<*>';
 757     my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20;
 758
 759     return if ( ! defined $facet_idx || ! defined $rs );
 760     # zebra's facet element, untokenized index
 761     my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount;
 762     # configure zebra results for retrieving the desired facet
 763     $rs->option( elementSetName => $facet_element );
 764     # get the facet record from result set
 765     my $facet = $rs->record( 0 )->raw;
 766     # if the facet has no restuls...
 767     return if !defined $facet;
 768     # TODO: benchmark DOM vs. SAX performance
 769     my $facet_dom = XML::LibXML->load_xml(
 770       string => ($facet)
 771     );
 772     my @terms = $facet_dom->getElementsByTagName('term');
 773     return if ! @terms;
 774
 775     my $facets = {};
 776     foreach my $term ( @terms ) {
 777         my $facet_value = $term->textContent;
 778         $facet_value =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
 779         $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
 780         $facets->{ $facet_value } += $term->getAttribute( 'occur' );
 781     }
 782
 783     return $facets;
 784 }
 785
 786 =head2 _get_facets_info
 787
 788     my $facets_info = C4::Search::_get_facets_info( $facets )
 789
 790 Internal function that extracts facets information and properly builds
 791 the data structure needed to render facet labels.
 792
 793 =cut
 794
 795 sub _get_facets_info {
 796
 797     my $facets = shift;
 798
 799     my $facets_info = {};
 800
 801     for my $facet ( @$facets ) {
 802         $facets_info->{ $facet->{ idx } }->{ label_value } = $facet->{ label };
 803     }
 804
 805     return $facets_info;
 806 }
 807
 808 sub pazGetRecords {
 809     my (
 810         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 811         $results_per_page, $offset,       $branches,       $query_type,
 812         $scan
 813     ) = @_;
 814     #NOTE: Parameter $branches is not used here !
 815
 816     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 817     $paz->init();
 818     $paz->search($simple_query);
 819     sleep 1;   # FIXME: WHY?
 820
 821     # do results
 822     my $results_hashref = {};
 823     my $stats = XMLin($paz->stat);
 824     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 825
 826     # for a grouped search result, the number of hits
 827     # is the number of groups returned; 'bib_hits' will have
 828     # the total number of bibs.
 829     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 830     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 831
 832     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 833         my $recid = $hit->{recid}->[0];
 834
 835         my $work_title = $hit->{'md-work-title'}->[0];
 836         my $work_author;
 837         if (exists $hit->{'md-work-author'}) {
 838             $work_author = $hit->{'md-work-author'}->[0];
 839         }
 840         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 841
 842         my $result_group = {};
 843         $result_group->{'group_label'} = $group_label;
 844         $result_group->{'group_merge_key'} = $recid;
 845
 846         my $count = 1;
 847         if (exists $hit->{count}) {
 848             $count = $hit->{count}->[0];
 849         }
 850         $result_group->{'group_count'} = $count;
 851
 852         for (my $i = 0; $i < $count; $i++) {
 853             # FIXME -- may need to worry about diacritics here
 854             my $rec = $paz->record($recid, $i);
 855             push @{ $result_group->{'RECORDS'} }, $rec;
 856         }
 857
 858         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 859     }
 860
 861     # pass through facets
 862     my $termlist_xml = $paz->termlist('author,subject');
 863     my $terms = XMLin($termlist_xml, forcearray => 1);
 864     my @facets_loop = ();
 865     #die Dumper($results);
 866 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 867 #        my @facets = ();
 868 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 869 #            push @facets, {
 870 #                facet_label_value => $facet->{'name'}->[0],
 871 #            };
 872 #        }
 873 #        push @facets_loop, ( {
 874 #            type_label => $list,
 875 #            facets => \@facets,
 876 #        } );
 877 #    }
 878
 879     return ( undef, $results_hashref, \@facets_loop );
 880 }
 881
 882 # TRUNCATION
 883 sub _detect_truncation {
 884     my ( $operand, $index ) = @_;
 885     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 886         @regexpr );
 887     $operand =~ s/^ //g;
 888     my @wordlist = split( /\s/, $operand );
 889     foreach my $word (@wordlist) {
 890         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 891             push @rightlefttruncated, $word;
 892         }
 893         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 894             push @lefttruncated, $word;
 895         }
 896         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 897             push @righttruncated, $word;
 898         }
 899         elsif ( index( $word, "*" ) < 0 ) {
 900             push @nontruncated, $word;
 901         }
 902         else {
 903             push @regexpr, $word;
 904         }
 905     }
 906     return (
 907         \@nontruncated,       \@righttruncated, \@lefttruncated,
 908         \@rightlefttruncated, \@regexpr
 909     );
 910 }
 911
 912 # STEMMING
 913 sub _build_stemmed_operand {
 914     my ($operand,$lang) = @_;
 915     require Lingua::Stem::Snowball ;
 916     my $stemmed_operand=q{};
 917
 918     # Stemmer needs language
 919     return $operand unless $lang;
 920
 921     # If operand contains a digit, it is almost certainly an identifier, and should
 922     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 923     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 924     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 925     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 926     return $operand if $operand =~ /\d/;
 927
 928 # FIXME: the locale should be set based on the user's language and/or search choice
 929     #warn "$lang";
 930     # Make sure we only use the first two letters from the language code
 931     $lang = lc(substr($lang, 0, 2));
 932     # The language codes for the two variants of Norwegian will now be "nb" and "nn",
 933     # none of which Lingua::Stem::Snowball can use, so we need to "translate" them
 934     if ($lang eq 'nb' || $lang eq 'nn') {
 935       $lang = 'no';
 936     }
 937     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
 938                                                encoding => "UTF-8" );
 939
 940     my @words = split( / /, $operand );
 941     my @stems = $stemmer->stem(\@words);
 942     for my $stem (@stems) {
 943         $stemmed_operand .= "$stem";
 944         $stemmed_operand .= "?"
 945           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 946         $stemmed_operand .= " ";
 947     }
 948     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 949     return $stemmed_operand;
 950 }
 951
 952 # FIELD WEIGHTING
 953 sub _build_weighted_query {
 954
 955 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 956 # pretty well but could work much better if we had a smarter query parser
 957     my ( $operand, $stemmed_operand, $index ) = @_;
 958     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 959     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 960     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 961     $operand =~ s/"/ /g;    # Bug 7518: searches with quotation marks don't work
 962
 963     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 964
 965     # Keyword, or, no index specified
 966     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 967         $weighted_query .=
 968           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 969         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 970         $weighted_query .= " or Title-cover,phr,r3=\"$operand\"";    # phrase title
 971         $weighted_query .= " or ti,wrdl,r4=\"$operand\"";    # words in title
 972           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 973           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 974         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 975           if $fuzzy_enabled;    # add fuzzy, word list
 976         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 977           if ( $stemming and $stemmed_operand )
 978           ;                     # add stemming, right truncation
 979         $weighted_query .= " or wrdl,r9=\"$operand\"";
 980
 981         # embedded sorting: 0 a-z; 1 z-a
 982         # $weighted_query .= ") or (sort1,aut=1";
 983     }
 984
 985     # Barcode searches should skip this process
 986     elsif ( $index eq 'bc' ) {
 987         $weighted_query .= "bc=\"$operand\"";
 988     }
 989
 990     # Authority-number searches should skip this process
 991     elsif ( $index eq 'an' ) {
 992         $weighted_query .= "an=\"$operand\"";
 993     }
 994
 995     # If the index is numeric, don't autoquote it.
 996     elsif ( $index =~ /,st-numeric$/ ) {
 997         $weighted_query .= " $index=$operand";
 998     }
 999
1000     # If the index already has more than one qualifier, wrap the operand
1001     # in quotes and pass it back (assumption is that the user knows what they
1002     # are doing and won't appreciate us mucking up their query
1003     elsif ( $index =~ ',' ) {
1004         $weighted_query .= " $index=\"$operand\"";
1005     }
1006
1007     #TODO: build better cases based on specific search indexes
1008     else {
1009         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
1010           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
1011         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
1012         $weighted_query .= " or $index,wrdl,r6=\"$operand\"";    # word list index
1013         $weighted_query .= " or $index,wrdl,fuzzy,r8=\"$operand\""
1014           if $fuzzy_enabled;    # add fuzzy, word list
1015         $weighted_query .= " or $index,wrdl,rt,r9=\"$stemmed_operand\""
1016           if ( $stemming and $stemmed_operand );    # add stemming, right truncation
1017     }
1018
1019     $weighted_query .= "))";                       # close rank specification
1020     return $weighted_query;
1021 }
1022
1023 =head2 getIndexes
1024
1025 Return an array with available indexes.
1026
1027 =cut
1028
1029 sub getIndexes{
1030     my @indexes = (
1031                     # biblio indexes
1032                     'ab',
1033                     'Abstract',
1034                     'acqdate',
1035                     'allrecords',
1036                     'an',
1037                     'Any',
1038                     'at',
1039                     'arl',
1040                     'arp',
1041                     'au',
1042                     'aub',
1043                     'aud',
1044                     'audience',
1045                     'auo',
1046                     'aut',
1047                     'Author',
1048                     'Author-in-order ',
1049                     'Author-personal-bibliography',
1050                     'Authority-Number',
1051                     'authtype',
1052                     'bc',
1053                     'Bib-level',
1054                     'biblionumber',
1055                     'bio',
1056                     'biography',
1057                     'callnum',
1058                     'cfn',
1059                     'Chronological-subdivision',
1060                     'cn-bib-source',
1061                     'cn-bib-sort',
1062                     'cn-class',
1063                     'cn-item',
1064                     'cn-prefix',
1065                     'cn-suffix',
1066                     'cpn',
1067                     'Code-institution',
1068                     'Conference-name',
1069                     'Conference-name-heading',
1070                     'Conference-name-see',
1071                     'Conference-name-seealso',
1072                     'Content-type',
1073                     'Control-number',
1074                     'copydate',
1075                     'Corporate-name',
1076                     'Corporate-name-heading',
1077                     'Corporate-name-see',
1078                     'Corporate-name-seealso',
1079                     'Country-publication',
1080                     'ctype',
1081                     'curriculum',
1082                     'date-entered-on-file',
1083                     'Date-of-acquisition',
1084                     'Date-of-publication',
1085                     'Date-time-last-modified',
1086                     'Dewey-classification',
1087                     'Dissertation-information',
1088                     'diss',
1089                     'dtlm',
1090                     'EAN',
1091                     'extent',
1092                     'fic',
1093                     'fiction',
1094                     'Form-subdivision',
1095                     'format',
1096                     'Geographic-subdivision',
1097                     'he',
1098                     'Heading',
1099                     'Heading-use-main-or-added-entry',
1100                     'Heading-use-series-added-entry ',
1101                     'Heading-use-subject-added-entry',
1102                     'Host-item',
1103                     'id-other',
1104                     'ident',
1105                     'Identifier-standard',
1106                     'Illustration-code',
1107                     'Index-term-genre',
1108                     'Index-term-uncontrolled',
1109                     'Interest-age-level',
1110                     'Interest-grade-level',
1111                     'ISBN',
1112                     'isbn',
1113                     'ISSN',
1114                     'issn',
1115                     'itemtype',
1116                     'kw',
1117                     'Koha-Auth-Number',
1118                     'l-format',
1119                     'language',
1120                     'language-original',
1121                     'lc-card',
1122                     'LC-card-number',
1123                     'lcn',
1124                     'lex',
1125                     'lexile-number',
1126                     'llength',
1127                     'ln',
1128                     'ln-audio',
1129                     'ln-subtitle',
1130                     'Local-classification',
1131                     'Local-number',
1132                     'Match-heading',
1133                     'Match-heading-see-from',
1134                     'Material-type',
1135                     'mc-itemtype',
1136                     'mc-rtype',
1137                     'mus',
1138                     'name',
1139                     'Music-number',
1140                     'Name-geographic',
1141                     'Name-geographic-heading',
1142                     'Name-geographic-see',
1143                     'Name-geographic-seealso',
1144                     'nb',
1145                     'Note',
1146                     'notes',
1147                     'ns',
1148                     'nt',
1149                     'Other-control-number',
1150                     'pb',
1151                     'Personal-name',
1152                     'Personal-name-heading',
1153                     'Personal-name-see',
1154                     'Personal-name-seealso',
1155                     'pl',
1156                     'Place-publication',
1157                     'pn',
1158                     'popularity',
1159                     'pubdate',
1160                     'Publisher',
1161                     'Provider',
1162                     'pv',
1163                     'Reading-grade-level',
1164                     'Record-control-number',
1165                     'rcn',
1166                     'Record-type',
1167                     'rtype',
1168                     'se',
1169                     'See',
1170                     'See-also',
1171                     'sn',
1172                     'Stock-number',
1173                     'su',
1174                     'Subject',
1175                     'Subject-heading-thesaurus',
1176                     'Subject-name-personal',
1177                     'Subject-subdivision',
1178                     'Summary',
1179                     'Suppress',
1180                     'su-geo',
1181                     'su-na',
1182                     'su-to',
1183                     'su-ut',
1184                     'ut',
1185                     'Term-genre-form',
1186                     'Term-genre-form-heading',
1187                     'Term-genre-form-see',
1188                     'Term-genre-form-seealso',
1189                     'ti',
1190                     'Title',
1191                     'Title-cover',
1192                     'Title-series',
1193                     'Title-uniform',
1194                     'Title-uniform-heading',
1195                     'Title-uniform-see',
1196                     'Title-uniform-seealso',
1197                     'totalissues',
1198                     'yr',
1199
1200                     # items indexes
1201                     'acqsource',
1202                     'barcode',
1203                     'bc',
1204                     'branch',
1205                     'ccode',
1206                     'classification-source',
1207                     'cn-sort',
1208                     'coded-location-qualifier',
1209                     'copynumber',
1210                     'damaged',
1211                     'datelastborrowed',
1212                     'datelastseen',
1213                     'holdingbranch',
1214                     'homebranch',
1215                     'issues',
1216                     'item',
1217                     'itemnumber',
1218                     'itype',
1219                     'Local-classification',
1220                     'location',
1221                     'lost',
1222                     'materials-specified',
1223                     'mc-ccode',
1224                     'mc-itype',
1225                     'mc-loc',
1226                     'notforloan',
1227                     'Number-local-acquisition',
1228                     'onloan',
1229                     'price',
1230                     'renewals',
1231                     'replacementprice',
1232                     'replacementpricedate',
1233                     'reserves',
1234                     'restricted',
1235                     'stack',
1236                     'stocknumber',
1237                     'inv',
1238                     'uri',
1239                     'withdrawn',
1240
1241                     # subject related
1242                   );
1243
1244     return \@indexes;
1245 }
1246
1247 =head2 buildQuery
1248
1249 ( $error, $query,
1250 $simple_query, $query_cgi,
1251 $query_desc, $limit,
1252 $limit_cgi, $limit_desc,
1253 $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
1254
1255 Build queries and limits in CCL, CGI, Human,
1256 handle truncation, stemming, field weighting, fuzziness, etc.
1257
1258 See verbose embedded documentation.
1259
1260
1261 =cut
1262
1263 sub buildQuery {
1264     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
1265     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
1266
1267     my $query_desc;
1268
1269     # dereference
1270     my @operators = $operators ? @$operators : ();
1271     my @indexes   = $indexes   ? @$indexes   : ();
1272     my @operands  = $operands  ? @$operands  : ();
1273     my @limits    = $limits    ? @$limits    : ();
1274     my @sort_by   = $sort_by   ? @$sort_by   : ();
1275
1276     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
1277     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
1278     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
1279     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
1280
1281     my $query        = $operands[0];
1282     my $simple_query = $operands[0];
1283
1284     # initialize the variables we're passing back
1285     my $query_cgi;
1286     my $query_type;
1287
1288     my $limit;
1289     my $limit_cgi;
1290     my $limit_desc;
1291
1292     my $cclq       = 0;
1293     my $cclindexes = getIndexes();
1294     if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) {
1295         while ( !$cclq && $query =~ /(?:^|\W)([\w-]+)(,[\w-]+)*[:=]/g ) {
1296             my $dx = lc($1);
1297             $cclq = grep { lc($_) eq $dx } @$cclindexes;
1298         }
1299         $query = "ccl=$query" if $cclq;
1300     }
1301
1302 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1303 # DIAGNOSTIC ONLY!!
1304     if ( $query =~ /^ccl=/ ) {
1305         my $q=$';
1306         # This is needed otherwise ccl= and &limit won't work together, and
1307         # this happens when selecting a subject on the opac-detail page
1308         @limits = grep {!/^$/} @limits;
1309         my $original_q = $q; # without available part
1310         unless ( grep { $_ eq 'available' } @limits ) {
1311             $q =~ s| and \( \(allrecords,AlwaysMatches=''\) and \(not-onloan-count,st-numeric >= 1\) and \(lost,st-numeric=0\) \)||;
1312             $original_q = $q;
1313         }
1314         if ( @limits ) {
1315             if ( grep { $_ eq 'available' } @limits ) {
1316                 $q .= q| and ( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )|;
1317                 @limits = grep {!/^available$/} @limits;
1318             }
1319             $q .= ' and '.join(' and ', @limits) if @limits;
1320         }
1321         return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $original_q, '', '', '', 'ccl' );
1322     }
1323     if ( $query =~ /^cql=/ ) {
1324         return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', 'cql' );
1325     }
1326     if ( $query =~ /^pqf=/ ) {
1327         $query_desc = $';
1328         $query_cgi = "q=pqf=".uri_escape_utf8($');
1329         return ( undef, $', $', $query_cgi, $query_desc, '', '', '', 'pqf' );
1330     }
1331
1332     # pass nested queries directly
1333     # FIXME: need better handling of some of these variables in this case
1334     # Nested queries aren't handled well and this implementation is flawed and causes users to be
1335     # unable to search for anything containing () commenting out, will be rewritten for 3.4.0
1336 #    if ( $query =~ /(\(|\))/ ) {
1337 #        return (
1338 #            undef,              $query, $simple_query, $query_cgi,
1339 #            $query,             $limit, $limit_cgi,    $limit_desc,
1340 #            'ccl'
1341 #        );
1342 #    }
1343
1344 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1345 # query operands and indexes and add stemming, truncation, field weighting, etc.
1346 # Once we do so, we'll end up with a value in $query, just like if we had an
1347 # incoming $query from the user
1348     else {
1349         $query = ""
1350           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1351         my $previous_operand
1352           ;    # a flag used to keep track if there was a previous query
1353                # if there was, we can apply the current operator
1354                # for every operand
1355         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1356
1357             # COMBINE OPERANDS, INDEXES AND OPERATORS
1358             if ( ($operands[$i] // '') ne '' ) {
1359                 $operands[$i]=~s/^\s+//;
1360
1361               # A flag to determine whether or not to add the index to the query
1362                 my $indexes_set;
1363
1364 # If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling
1365                 if ( $operands[$i] =~ /\w(:|=)/ || $scan ) {
1366                     $weight_fields    = 0;
1367                     $stemming         = 0;
1368                 } else {
1369                     $operands[$i] =~ s/\?/{?}/g; # need to escape question marks
1370                 }
1371                 my $operand = $operands[$i];
1372                 my $index   = $indexes[$i] || 'kw';
1373
1374                 # Add index-specific attributes
1375
1376                 #Afaik, this 'yr' condition will only ever be met in the staff interface advanced search
1377                 #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query,
1378                 #which is processed higher up in this sub. Other than that, year searches are typically
1379                 #handled as limits which are not processed her either.
1380
1381                 # Search ranges: Date of Publication, st-numeric
1382                 if ( $index =~ /(yr|st-numeric)/ ) {
1383                     #weight_fields/relevance search causes errors with date ranges
1384                     #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
1385                     #In the case of YYYY-YYYY, it will return no results
1386                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1387                 }
1388
1389                 # Date of Acquisition
1390                 elsif ( $index =~ /acqdate/ ) {
1391                     #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format
1392                     #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the
1393                       #top of the results just because they have lots of item records matching that date.
1394                     #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
1395                       #irrelevant here
1396                     $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
1397                 }
1398                 # ISBN,ISSN,Standard Number, don't need special treatment
1399                 elsif ( $index eq 'nb' || $index eq 'ns' || $index eq 'hi' ) {
1400                     (
1401                         $stemming,      $auto_truncation,
1402                         $weight_fields, $fuzzy_enabled
1403                     ) = ( 0, 0, 0, 0 );
1404
1405                     if ( $index eq 'nb' ) {
1406                         if ( C4::Context->preference("SearchWithISBNVariations") ) {
1407                             my @isbns = C4::Koha::GetVariationsOfISBN( $operand );
1408                             $operands[$i] = $operand =  '(nb=' . join(' OR nb=', @isbns) . ')';
1409                             $indexes[$i] = $index = 'kw';
1410                         }
1411                     }
1412                 }
1413
1414                 # Set default structure attribute (word list)
1415                 my $struct_attr = q{};
1416                 unless ( $indexes_set || $index =~ /,(st-|phr|ext|wrdl)/ || $index =~ /^(nb|ns)$/ ) {
1417                     $struct_attr = ",wrdl";
1418                 }
1419
1420                 # Some helpful index variants
1421                 my $index_plus       = $index . $struct_attr . ':';
1422                 my $index_plus_comma = $index . $struct_attr . ',';
1423
1424                 if ($auto_truncation){
1425                         unless ( $index =~ /,(st-|phr|ext)/ ) {
1426                                                 #FIXME only valid with LTR scripts
1427                                                 $operand=join(" ",map{
1428                                                                                         (index($_,"*")>0?"$_":"$_*")
1429                                                                                          }split (/\s+/,$operand));
1430                                                 warn $operand if $DEBUG;
1431                                         }
1432                                 }
1433
1434                 # Detect Truncation
1435                 my $truncated_operand;
1436                 my( $nontruncated, $righttruncated, $lefttruncated,
1437                     $rightlefttruncated, $regexpr
1438                 ) = _detect_truncation( $operand, $index );
1439                 warn
1440 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1441                   if $DEBUG;
1442
1443                 # Apply Truncation
1444                 if (
1445                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1446                     scalar(@$rightlefttruncated) > 0 )
1447                 {
1448
1449                # Don't field weight or add the index to the query, we do it here
1450                     $indexes_set = 1;
1451                     undef $weight_fields;
1452                     my $previous_truncation_operand;
1453                     if (scalar @$nontruncated) {
1454                         $truncated_operand .= "$index_plus @$nontruncated ";
1455                         $previous_truncation_operand = 1;
1456                     }
1457                     if (scalar @$righttruncated) {
1458                         $truncated_operand .= "and " if $previous_truncation_operand;
1459                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1460                         $previous_truncation_operand = 1;
1461                     }
1462                     if (scalar @$lefttruncated) {
1463                         $truncated_operand .= "and " if $previous_truncation_operand;
1464                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1465                         $previous_truncation_operand = 1;
1466                     }
1467                     if (scalar @$rightlefttruncated) {
1468                         $truncated_operand .= "and " if $previous_truncation_operand;
1469                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1470                         $previous_truncation_operand = 1;
1471                     }
1472                 }
1473                 $operand = $truncated_operand if $truncated_operand;
1474                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1475
1476                 # Handle Stemming
1477                 my $stemmed_operand;
1478                 $stemmed_operand = _build_stemmed_operand($operand, $lang)
1479                                                                                 if $stemming;
1480
1481                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1482
1483                 # Handle Field Weighting
1484                 my $weighted_operand;
1485                 if ($weight_fields) {
1486                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1487                     $operand = $weighted_operand;
1488                     $indexes_set = 1;
1489                 }
1490
1491                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1492
1493                 #Use relevance ranking when not using a weighted query (which adds relevance ranking of its own)
1494
1495                 #N.B. Truncation is mutually exclusive with Weighted Queries,
1496                 #so even if QueryWeightFields is turned on, QueryAutoTruncate will turn it off, thus
1497                 #the need for this relevance wrapper.
1498                 $operand = "(rk=($operand))" unless $weight_fields;
1499
1500                 ($query,$query_cgi,$query_desc,$previous_operand) = _build_initial_query({
1501                     query => $query,
1502                     query_cgi => $query_cgi,
1503                     query_desc => $query_desc,
1504                     operator => ($operators[ $i - 1 ]) ? $operators[ $i - 1 ] : '',
1505                     parsed_operand => $operand,
1506                     original_operand => $operands[$i] // '',
1507                     index => $index,
1508                     index_plus => $index_plus,
1509                     indexes_set => $indexes_set,
1510                     previous_operand => $previous_operand,
1511                 });
1512
1513             }    #/if $operands
1514         }    # /for
1515     }
1516     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1517
1518     # add limits
1519     my %group_OR_limits;
1520     my $availability_limit;
1521     foreach my $this_limit (@limits) {
1522         next unless $this_limit;
1523         if ( $this_limit =~ /available/ ) {
1524 #
1525 ## 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1526 ## In English:
1527 ## all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1528             $availability_limit .=
1529 "( (allrecords,AlwaysMatches='') and (not-onloan-count,st-numeric >= 1) and (lost,st-numeric=0) )";
1530             $limit_cgi  .= "&limit=available";
1531             $limit_desc .= "";
1532         }
1533
1534         # group_OR_limits, prefixed by mc-
1535         # OR every member of the group
1536         elsif ( $this_limit =~ /mc/ ) {
1537             my ($k,$v) = split(/:/, $this_limit,2);
1538             if ( $k !~ /mc-i(tem)?type/ ) {
1539                 # in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes
1540                 $this_limit =~ tr/"//d;
1541                 $this_limit = $k.':"'.$v.'"';
1542             }
1543
1544             $group_OR_limits{$k} .= " or " if $group_OR_limits{$k};
1545             $limit_desc      .= " or " if $group_OR_limits{$k};
1546             $group_OR_limits{$k} .= "$this_limit";
1547             $limit_cgi       .= "&limit=" . uri_escape_utf8($this_limit);
1548             $limit_desc      .= " $this_limit";
1549         }
1550
1551         # Regular old limits
1552         else {
1553             $limit .= " and " if $limit || $query;
1554             $limit      .= "$this_limit";
1555             $limit_cgi  .= "&limit=" . uri_escape_utf8($this_limit);
1556             if ($this_limit =~ /^branch:(.+)/) {
1557                 my $branchcode = $1;
1558                 my $library = Koha::Libraries->find( $branchcode );
1559                 if (defined $library) {
1560                     $limit_desc .= " branch:" . $library->branchname;
1561                 } else {
1562                     $limit_desc .= " $this_limit";
1563                 }
1564             } else {
1565                 $limit_desc .= " $this_limit";
1566             }
1567         }
1568     }
1569     foreach my $k (keys (%group_OR_limits)) {
1570         $limit .= " and " if ( $query || $limit );
1571         $limit .= "($group_OR_limits{$k})";
1572     }
1573     if ($availability_limit) {
1574         $limit .= " and " if ( $query || $limit );
1575         $limit .= "($availability_limit)";
1576     }
1577
1578     # Normalize the query and limit strings
1579     # This is flawed , means we can't search anything with : in it
1580     # if user wants to do ccl or cql, start the query with that
1581 #    $query =~ s/:/=/g;
1582     #NOTE: We use several several different regexps here as you can't have variable length lookback assertions
1583     $query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g;
1584     $query =~ s/(?<=(wrdl)):/=/g;
1585     $query =~ s/(?<=(trn|phr)):/=/g;
1586     $query =~ s/(?<=(st-numeric)):/=/g;
1587     $query =~ s/(?<=(st-year)):/=/g;
1588     $query =~ s/(?<=(st-date-normalized)):/=/g;
1589
1590     # Removing warnings for later substitutions
1591     $query      //= q{};
1592     $query_desc //= q{};
1593     $query_cgi  //= q{};
1594     $limit      //= q{};
1595     $limit_desc //= q{};
1596     $limit =~ s/:/=/g;
1597     for ( $query, $query_desc, $limit, $limit_desc ) {
1598         s/  +/ /g;    # remove extra spaces
1599         s/^ //g;     # remove any beginning spaces
1600         s/ $//g;     # remove any ending spaces
1601         s/==/=/g;    # remove double == from query
1602     }
1603     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1604
1605     for ($query_cgi,$simple_query) {
1606         s/"//g;
1607     }
1608     # append the limit to the query
1609     $query .= " " . $limit;
1610
1611     # Warnings if DEBUG
1612     if ($DEBUG) {
1613         warn "QUERY:" . $query;
1614         warn "QUERY CGI:" . $query_cgi;
1615         warn "QUERY DESC:" . $query_desc;
1616         warn "LIMIT:" . $limit;
1617         warn "LIMIT CGI:" . $limit_cgi;
1618         warn "LIMIT DESC:" . $limit_desc;
1619         warn "---------\nLeave buildQuery\n---------";
1620     }
1621
1622     return (
1623         undef,              $query, $simple_query, $query_cgi,
1624         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1625         $query_type
1626     );
1627 }
1628
1629 =head2 _build_initial_query
1630
1631   ($query, $query_cgi, $query_desc, $previous_operand) = _build_initial_query($initial_query_params);
1632
1633   Build a section of the initial query containing indexes, operators, and operands.
1634
1635 =cut
1636
1637 sub _build_initial_query {
1638     my ($params) = @_;
1639
1640     my $operator = "";
1641     if ($params->{previous_operand}){
1642         #If there is a previous operand, add a supplied operator or the default 'and'
1643         $operator = ($params->{operator}) ? " ".($params->{operator})." " : ' and ';
1644     }
1645
1646     #NOTE: indexes_set is typically set when doing truncation or field weighting
1647     my $operand = ($params->{indexes_set}) ? $params->{parsed_operand} : $params->{index_plus}.$params->{parsed_operand};
1648
1649     #e.g. "kw,wrdl:test"
1650     #e.g. " and kw,wrdl:test"
1651     $params->{query} .= $operator . $operand;
1652
1653     $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator;
1654     $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index};
1655     $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if $params->{original_operand};
1656
1657     #e.g. " and kw,wrdl: test"
1658     $params->{query_desc} .= $operator . ( $params->{index_plus} // q{} ) . " " . ( $params->{original_operand} // q{} );
1659
1660     $params->{previous_operand} = 1 unless $params->{previous_operand}; #If there is no previous operand, mark this as one
1661
1662     return ($params->{query}, $params->{query_cgi}, $params->{query_desc}, $params->{previous_operand});
1663 }
1664
1665 =head2 searchResults
1666
1667   my @search_results = searchResults($search_context, $searchdesc, $hits,
1668                                      $results_per_page, $offset, $scan,
1669                                      @marcresults);
1670
1671 Format results in a form suitable for passing to the template
1672
1673 =cut
1674
1675 # IMO this subroutine is pretty messy still -- it's responsible for
1676 # building the HTML output for the template
1677 sub searchResults {
1678     my ( $search_context, $searchdesc, $hits, $results_per_page, $offset, $scan, $marcresults, $xslt_variables ) = @_;
1679     my $dbh = C4::Context->dbh;
1680     my @newresults;
1681
1682     require C4::Items;
1683
1684     $search_context->{'interface'} = 'opac' if !$search_context->{'interface'} || $search_context->{'interface'} ne 'intranet';
1685     my ($is_opac, $hidelostitems);
1686     if ($search_context->{'interface'} eq 'opac') {
1687         $hidelostitems = C4::Context->preference('hidelostitems');
1688         $is_opac       = 1;
1689     }
1690
1691     my $record_processor = Koha::RecordProcessor->new({
1692         filters => 'ViewPolicy'
1693     });
1694
1695     #Build branchnames hash
1696     my %branches = map { $_->branchcode => $_->branchname } Koha::Libraries->search({}, { order_by => 'branchname' });
1697
1698 # FIXME - We build an authorised values hash here, using the default framework
1699 # though it is possible to have different authvals for different fws.
1700
1701     my $shelflocations =
1702       { map { $_->{authorised_value} => $_->{lib} } Koha::AuthorisedValues->get_descriptions_by_koha_field( { frameworkcode => '', kohafield => 'items.location' } ) };
1703
1704     # get notforloan authorised value list (see $shelflocations  FIXME)
1705     my $av = Koha::MarcSubfieldStructures->search({ frameworkcode => '', kohafield => 'items.notforloan', authorised_value => [ -and => {'!=' => undef }, {'!=' => ''}] });
1706     my $notforloan_authorised_value = $av->count ? $av->next->authorised_value : undef;
1707
1708     #Get itemtype hash
1709     my $itemtypes = Koha::ItemTypes->search_with_localization;
1710     my %itemtypes = map { $_->{itemtype} => $_ } @{ $itemtypes->unblessed };
1711
1712     #search item field code
1713     my ($itemtag, undef) = &GetMarcFromKohaField( "items.itemnumber" );
1714
1715     ## find column names of items related to MARC
1716     my %subfieldstosearch;
1717     my @columns = Koha::Database->new()->schema()->resultset('Item')->result_source->columns;
1718     for my $column ( @columns ) {
1719         my ( $tagfield, $tagsubfield ) =
1720           &GetMarcFromKohaField( "items." . $column );
1721         if ( defined $tagsubfield ) {
1722             $subfieldstosearch{$column} = $tagsubfield;
1723         }
1724     }
1725
1726     # handle which records to actually retrieve
1727     my $times;
1728     if ( $hits && $offset + $results_per_page <= $hits ) {
1729         $times = $offset + $results_per_page;
1730     }
1731     else {
1732         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1733     }
1734
1735     my $marcflavour = C4::Context->preference("marcflavour");
1736     # We get the biblionumber position in MARC
1737     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField( 'biblio.biblionumber' );
1738
1739     # set stuff for XSLT processing here once, not later again for every record we retrieved
1740     my $xslfile;
1741     my $xslsyspref;
1742     if( $is_opac ){
1743         $xslsyspref = "OPACXSLTResultsDisplay";
1744         $xslfile = C4::Context->preference( $xslsyspref );
1745     } else {
1746         $xslsyspref = "XSLTResultsDisplay";
1747         $xslfile = C4::Context->preference( $xslsyspref ) || "default";
1748     }
1749     my $lang   = $xslfile ? C4::Languages::getlanguage()  : undef;
1750     my $sysxml = $xslfile ? C4::XSLT::get_xslt_sysprefs() : undef;
1751
1752     my $userenv = C4::Context->userenv;
1753     my $logged_in_user
1754         = ( defined $userenv and $userenv->{number} )
1755         ? Koha::Patrons->find( $userenv->{number} )
1756         : undef;
1757     my $patron_category_hide_lost_items = ($logged_in_user) ? $logged_in_user->category->hidelostitems : 0;
1758
1759     # loop through all of the records we've retrieved
1760     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1761
1762         my $marcrecord;
1763         if ($scan) {
1764             # For Scan searches we built USMARC data
1765             $marcrecord = MARC::Record->new_from_usmarc( $marcresults->[$i]);
1766         } else {
1767             # Normal search, render from Zebra's output
1768             $marcrecord = new_record_from_zebra(
1769                 'biblioserver',
1770                 $marcresults->[$i]
1771             );
1772
1773             if ( ! defined $marcrecord ) {
1774                 warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i];
1775                 next;
1776             }
1777         }
1778
1779         my $fw = $scan
1780              ? undef
1781              : $bibliotag < 10
1782                ? GetFrameworkCode($marcrecord->field($bibliotag)->data)
1783                : GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
1784
1785         SetUTF8Flag($marcrecord);
1786         my $oldbiblio = TransformMarcToKoha( $marcrecord, $fw );
1787         $oldbiblio->{result_number} = $i + 1;
1788
1789                 $oldbiblio->{normalized_upc}  = GetNormalizedUPC(       $marcrecord,$marcflavour);
1790                 $oldbiblio->{normalized_ean}  = GetNormalizedEAN(       $marcrecord,$marcflavour);
1791                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1792                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1793                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1794
1795                 # edition information, if any
1796         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1797
1798         my $itemtype = $oldbiblio->{itemtype} ? $itemtypes{$oldbiblio->{itemtype}} : undef;
1799         # add imageurl to itemtype if there is one
1800         $oldbiblio->{imageurl} = $itemtype ? getitemtypeimagelocation( $search_context->{'interface'}, $itemtype->{imageurl} ) : q{};
1801         # Build summary if there is one (the summary is defined in the itemtypes table)
1802         $oldbiblio->{description} = $itemtype ? $itemtype->{translated_description} : q{};
1803
1804         # FIXME: this is only used in the deprecated non-XLST opac results
1805         if ( !$xslfile && $is_opac && $itemtype && $itemtype->{summary} ) {
1806             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1807             my @fields  = $marcrecord->fields();
1808
1809             my $newsummary;
1810             foreach my $line ( "$summary\n" =~ /(.*)\n/g ){
1811                 my $tags = {};
1812                 foreach my $tag ( $line =~ /\[(\d{3}[\w|\d])\]/ ) {
1813                     $tag =~ /(.{3})(.)/;
1814                     if($marcrecord->field($1)){
1815                         my @abc = $marcrecord->field($1)->subfield($2);
1816                         $tags->{$tag} = $#abc + 1 ;
1817                     }
1818                 }
1819
1820                 # We catch how many times to repeat this line
1821                 my $max = 0;
1822                 foreach my $tag (keys(%$tags)){
1823                     $max = $tags->{$tag} if($tags->{$tag} > $max);
1824                  }
1825
1826                 # we replace, and repeat each line
1827                 for (my $i = 0 ; $i < $max ; $i++){
1828                     my $newline = $line;
1829
1830                     foreach my $tag ( $newline =~ /\[(\d{3}[\w|\d])\]/g ) {
1831                         $tag =~ /(.{3})(.)/;
1832
1833                         if($marcrecord->field($1)){
1834                             my @repl = $marcrecord->field($1)->subfield($2);
1835                             my $subfieldvalue = $repl[$i];
1836                             $newline =~ s/\[$tag\]/$subfieldvalue/g;
1837                         }
1838                     }
1839                     $newsummary .= "$newline\n";
1840                 }
1841             }
1842
1843             $newsummary =~ s/\[(.*?)]//g;
1844             $newsummary =~ s/\n/<br\/>/g;
1845             $oldbiblio->{summary} = $newsummary;
1846         }
1847
1848         # Pull out the items fields
1849         my @fields = $marcrecord->field($itemtag);
1850         my $marcflavor = C4::Context->preference("marcflavour");
1851
1852         # adding linked items that belong to host records
1853         if ( C4::Context->preference('EasyAnalyticalRecords') ) {
1854             my $analyticsfield = '773';
1855             if ($marcflavor eq 'MARC21' || $marcflavor eq 'NORMARC') {
1856                 $analyticsfield = '773';
1857             } elsif ($marcflavor eq 'UNIMARC') {
1858                 $analyticsfield = '461';
1859             }
1860             foreach my $hostfield ( $marcrecord->field($analyticsfield)) {
1861                 my $hostbiblionumber = $hostfield->subfield("0");
1862                 my $linkeditemnumber = $hostfield->subfield("9");
1863                 if( $hostbiblionumber ) {
1864                     my $linkeditemmarc = C4::Items::GetMarcItem( $hostbiblionumber, $linkeditemnumber );
1865                     if ($linkeditemmarc) {
1866                         my $linkeditemfield = $linkeditemmarc->field($itemtag);
1867                         if ($linkeditemfield) {
1868                             push( @fields, $linkeditemfield );
1869                         }
1870                     }
1871                 }
1872             }
1873         }
1874
1875         # Setting item statuses for display
1876         my @available_items_loop;
1877         my @onloan_items_loop;
1878         my @other_items_loop;
1879
1880         my $available_items;
1881         my $onloan_items;
1882         my $other_items;
1883
1884         my $ordered_count         = 0;
1885         my $available_count       = 0;
1886         my $onloan_count          = 0;
1887         my $longoverdue_count     = 0;
1888         my $other_count           = 0;
1889         my $withdrawn_count        = 0;
1890         my $itemlost_count        = 0;
1891         my $hideatopac_count      = 0;
1892         my $itembinding_count     = 0;
1893         my $itemdamaged_count     = 0;
1894         my $item_in_transit_count = 0;
1895         my $can_place_holds       = 0;
1896         my $item_onhold_count     = 0;
1897         my $notforloan_count      = 0;
1898         my $items_count           = scalar(@fields);
1899         my $maxitems_pref = C4::Context->preference('maxItemsinSearchResults');
1900         my $maxitems = $maxitems_pref ? $maxitems_pref - 1 : 1;
1901         my @hiddenitems; # hidden itemnumbers based on OpacHiddenItems syspref
1902
1903         # loop through every item
1904         foreach my $field (@fields) {
1905             my $item;
1906
1907             # populate the items hash
1908             foreach my $code ( keys %subfieldstosearch ) {
1909                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1910             }
1911             $item->{description} = $itemtypes{ $item->{itype} }{translated_description} if $item->{itype};
1912
1913                 # OPAC hidden items
1914             if ($is_opac) {
1915                 # hidden because lost
1916                 if ($hidelostitems && $item->{itemlost}) {
1917                     $hideatopac_count++;
1918                     next;
1919                 }
1920                 # hidden based on OpacHiddenItems syspref
1921                 my @hi = C4::Items::GetHiddenItemnumbers({ items=> [ $item ], borcat => $search_context->{category} });
1922                 if (scalar @hi) {
1923                     push @hiddenitems, @hi;
1924                     $hideatopac_count++;
1925                     next;
1926                 }
1927             }
1928
1929             my $hbranch     = C4::Context->preference('StaffSearchResultsDisplayBranch');
1930             my $otherbranch = $hbranch eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1931
1932             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1933             if ($item->{$hbranch}) {
1934                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1935             }
1936             elsif ($item->{$otherbranch}) {     # Last resort
1937                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1938             }
1939
1940             my $prefix =
1941                 ( $item->{$hbranch} ? $item->{$hbranch} . '--' : q{} )
1942               . ( $item->{location} ? $item->{location} : q{} )
1943               . ( $item->{itype}    ? $item->{itype}    : q{} )
1944               . ( $item->{itemcallnumber} ? $item->{itemcallnumber} : q{} );
1945 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1946             if ( $item->{onloan}
1947                 and $logged_in_user
1948                 and !( $patron_category_hide_lost_items and $item->{itemlost} ) )
1949             {
1950                 $onloan_count++;
1951                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1952                 $onloan_items->{$key}->{due_date} = $item->{onloan};
1953                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1954                 $onloan_items->{$key}->{branchname}     = $item->{branchname};
1955                 $onloan_items->{$key}->{location}       = $shelflocations->{ $item->{location} } if $item->{location};
1956                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1957                 $onloan_items->{$key}->{description}    = $item->{description};
1958                 $onloan_items->{$key}->{imageurl} =
1959                   getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype} }->{imageurl} );
1960
1961                 # if something's checked out and lost, mark it as 'long overdue'
1962                 if ( $item->{itemlost} ) {
1963                     $onloan_items->{$key}->{longoverdue}++;
1964                     $longoverdue_count++;
1965                 }
1966                 else {    # can place holds as long as item isn't lost
1967                     $can_place_holds = 1;
1968                 }
1969             }
1970
1971          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1972             else {
1973
1974                 my $itemtype = C4::Context->preference("item-level_itypes")? $item->{itype}: $oldbiblio->{itemtype};
1975                 $item->{notforloan} = 1 if !$item->{notforloan} &&
1976                     $itemtype && $itemtypes{ $itemtype }->{notforloan};
1977
1978                 # item is on order
1979                 if ( $item->{notforloan} < 0 ) {
1980                     $ordered_count++;
1981                 } elsif ( $item->{notforloan} > 0 ) {
1982                     $notforloan_count++;
1983                 }
1984
1985                 # is item in transit?
1986                 my $transfertwhen = '';
1987                 my ($transfertfrom, $transfertto);
1988
1989                 # is item on the reserve shelf?
1990                 my $reservestatus = '';
1991
1992                 unless ($item->{withdrawn}
1993                         || $item->{itemlost}
1994                         || $item->{damaged}
1995                         || $item->{notforloan}
1996                         || ( C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck')
1997                         && $items_count > C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') ) ) {
1998
1999                     # A couple heuristics to limit how many times
2000                     # we query the database for item transfer information, sacrificing
2001                     # accuracy in some cases for speed;
2002                     #
2003                     # 1. don't query if item has one of the other statuses
2004                     # 2. don't check transit status if the bib has
2005                     #    more than 20 items
2006                     #
2007                     # FIXME: to avoid having the query the database like this, and to make
2008                     #        the in transit status count as unavailable for search limiting,
2009                     #        should map transit status to record indexed in Zebra.
2010                     #
2011                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
2012                     $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} );
2013                 }
2014
2015                 # item is withdrawn, lost, damaged, not for loan, reserved or in transit
2016                 if (   $item->{withdrawn}
2017                     || $item->{itemlost}
2018                     || $item->{damaged}
2019                     || $item->{notforloan}
2020                     || $reservestatus eq 'Waiting'
2021                     || ($transfertwhen && $transfertwhen ne ''))
2022                 {
2023                     $withdrawn_count++        if $item->{withdrawn};
2024                     $itemlost_count++        if $item->{itemlost};
2025                     $itemdamaged_count++     if $item->{damaged};
2026                     $item_in_transit_count++ if $transfertwhen && $transfertwhen ne '';
2027                     $item_onhold_count++     if $reservestatus eq 'Waiting';
2028                     $item->{status} = ($item->{withdrawn}//q{}) . "-" . ($item->{itemlost}//q{}) . "-" . ($item->{damaged}//q{}) . "-" . ($item->{notforloan}//q{});
2029
2030                     # can place a hold on a item if
2031                     # not lost nor withdrawn
2032                     # not damaged unless AllowHoldsOnDamagedItems is true
2033                     # item is either for loan or on order (notforloan < 0)
2034                     $can_place_holds = 1
2035                       if (
2036                            !$item->{itemlost}
2037                         && !$item->{withdrawn}
2038                         && ( !$item->{damaged} || C4::Context->preference('AllowHoldsOnDamagedItems') )
2039                         && ( !$item->{notforloan} || $item->{notforloan} < 0 )
2040                       );
2041
2042                     $other_count++;
2043
2044                     my $key = $prefix . $item->{status};
2045                     foreach (qw(withdrawn itemlost damaged branchname itemcallnumber)) {
2046                         $other_items->{$key}->{$_} = $item->{$_};
2047                     }
2048                     $other_items->{$key}->{intransit} = ( $transfertwhen ne '' ) ? 1 : 0;
2049                     $other_items->{$key}->{onhold} = ($reservestatus) ? 1 : 0;
2050                     $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value and $item->{notforloan};
2051                     $other_items->{$key}->{count}++ if $item->{$hbranch};
2052                     $other_items->{$key}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2053                     $other_items->{$key}->{description} = $item->{description};
2054                     $other_items->{$key}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2055                 }
2056                 # item is available
2057                 else {
2058                     $can_place_holds = 1;
2059                     $available_count++;
2060                     $available_items->{$prefix}->{count}++ if $item->{$hbranch};
2061                     foreach (qw(branchname itemcallnumber description)) {
2062                         $available_items->{$prefix}->{$_} = $item->{$_};
2063                     }
2064                     $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} } if $item->{location};
2065                     $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( $search_context->{'interface'}, $itemtypes{ $item->{itype}//q{} }->{imageurl} );
2066                 }
2067             }
2068         }    # notforloan, item level and biblioitem level
2069
2070         # if all items are hidden, do not show the record
2071         if ($items_count > 0 && $hideatopac_count == $items_count) {
2072             next;
2073         }
2074
2075         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
2076         for my $key ( sort keys %$onloan_items ) {
2077             (++$onloanitemscount > $maxitems) and last;
2078             push @onloan_items_loop, $onloan_items->{$key};
2079         }
2080         for my $key ( sort keys %$other_items ) {
2081             (++$otheritemscount > $maxitems) and last;
2082             push @other_items_loop, $other_items->{$key};
2083         }
2084         for my $key ( sort keys %$available_items ) {
2085             (++$availableitemscount > $maxitems) and last;
2086             push @available_items_loop, $available_items->{$key}
2087         }
2088
2089         # XSLT processing of some stuff
2090         # we fetched the sysprefs already before the loop through all retrieved record!
2091         if (!$scan && $xslfile) {
2092             $record_processor->options({
2093                 frameworkcode => $fw,
2094                 interface     => $search_context->{'interface'}
2095             });
2096
2097             $record_processor->process($marcrecord);
2098             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, $xslsyspref, 1, \@hiddenitems, $sysxml, $xslfile, $lang, $xslt_variables);
2099         }
2100
2101         # if biblio level itypes are used and itemtype is notforloan, it can't be reserved either
2102         if (!C4::Context->preference("item-level_itypes")) {
2103             if ($itemtype && $itemtype->{notforloan}) {
2104                 $can_place_holds = 0;
2105             }
2106         }
2107         $oldbiblio->{norequests} = 1 unless $can_place_holds;
2108         $oldbiblio->{items_count}          = $items_count;
2109         $oldbiblio->{available_items_loop} = \@available_items_loop;
2110         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
2111         $oldbiblio->{other_items_loop}     = \@other_items_loop;
2112         $oldbiblio->{availablecount}       = $available_count;
2113         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
2114         $oldbiblio->{onloancount}          = $onloan_count;
2115         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
2116         $oldbiblio->{othercount}           = $other_count;
2117         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
2118         $oldbiblio->{withdrawncount}        = $withdrawn_count;
2119         $oldbiblio->{itemlostcount}        = $itemlost_count;
2120         $oldbiblio->{damagedcount}         = $itemdamaged_count;
2121         $oldbiblio->{intransitcount}       = $item_in_transit_count;
2122         $oldbiblio->{onholdcount}          = $item_onhold_count;
2123         $oldbiblio->{orderedcount}         = $ordered_count;
2124         $oldbiblio->{notforloancount}      = $notforloan_count;
2125
2126         if (C4::Context->preference("AlternateHoldingsField") && $items_count == 0) {
2127             my $fieldspec = C4::Context->preference("AlternateHoldingsField");
2128             my $subfields = substr $fieldspec, 3;
2129             my $holdingsep = C4::Context->preference("AlternateHoldingsSeparator") || ' ';
2130             my @alternateholdingsinfo = ();
2131             my @holdingsfields = $marcrecord->field(substr $fieldspec, 0, 3);
2132             my $alternateholdingscount = 0;
2133
2134             for my $field (@holdingsfields) {
2135                 my %holding = ( holding => '' );
2136                 my $havesubfield = 0;
2137                 for my $subfield ($field->subfields()) {
2138                     if ((index $subfields, $$subfield[0]) >= 0) {
2139                         $holding{'holding'} .= $holdingsep if (length $holding{'holding'} > 0);
2140                         $holding{'holding'} .= $$subfield[1];
2141                         $havesubfield++;
2142                     }
2143                 }
2144                 if ($havesubfield) {
2145                     push(@alternateholdingsinfo, \%holding);
2146                     $alternateholdingscount++;
2147                 }
2148             }
2149
2150             $oldbiblio->{'ALTERNATEHOLDINGS'} = \@alternateholdingsinfo;
2151             $oldbiblio->{'alternateholdings_count'} = $alternateholdingscount;
2152         }
2153
2154         $oldbiblio->{biblio_object} = Koha::Biblios->find( $oldbiblio->{biblionumber} );
2155
2156         push( @newresults, $oldbiblio );
2157     }
2158
2159     return @newresults;
2160 }
2161
2162 =head2 enabled_staff_search_views
2163
2164 %hash = enabled_staff_search_views()
2165
2166 This function returns a hash that contains three flags obtained from the system
2167 preferences, used to determine whether a particular staff search results view
2168 is enabled.
2169
2170 =over 2
2171
2172 =item C<Output arg:>
2173
2174     * $hash{can_view_MARC} is true only if the MARC view is enabled
2175     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2176     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2177
2178 =item C<usage in the script:>
2179
2180 =back
2181
2182 $template->param ( C4::Search::enabled_staff_search_views );
2183
2184 =cut
2185
2186 sub enabled_staff_search_views
2187 {
2188         return (
2189                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2190                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2191                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2192         );
2193 }
2194
2195 =head2 z3950_search_args
2196
2197 $arrayref = z3950_search_args($matchpoints)
2198
2199 This function returns an array reference that contains the search parameters to be
2200 passed to the Z39.50 search script (z3950_search.pl). The array elements
2201 are hash refs whose keys are name and value, and whose values are the
2202 name of a search parameter, the value of that search parameter and the URL encoded
2203 value of that parameter.
2204
2205 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2206
2207 The search parameter values are obtained from the bibliographic record whose
2208 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2209
2210 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2211 a general purpose search argument. In this case, the returned array contains only
2212 entry: the key is 'title' and the value is derived from $matchpoints.
2213
2214 If a search parameter value is undefined or empty, it is not included in the returned
2215 array.
2216
2217 The returned array reference may be passed directly to the template parameters.
2218
2219 =over 2
2220
2221 =item C<Output arg:>
2222
2223     * $array containing hash refs as described above
2224
2225 =item C<usage in the script:>
2226
2227 =back
2228
2229 $data = Biblio::GetBiblioData($bibno);
2230 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2231
2232 *OR*
2233
2234 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2235
2236 =cut
2237
2238 sub z3950_search_args {
2239     my $bibrec = shift;
2240
2241     my $isbn_string = ref( $bibrec ) ? $bibrec->{title} : $bibrec;
2242     my $isbn = Business::ISBN->new( $isbn_string );
2243
2244     if (defined $isbn && $isbn->is_valid)
2245     {
2246         if ( ref($bibrec) ) {
2247             $bibrec->{isbn} = $isbn_string;
2248             $bibrec->{title} = undef;
2249         } else {
2250             $bibrec = { isbn => $isbn_string };
2251         }
2252     }
2253     else {
2254         $bibrec = { title => $bibrec } if !ref $bibrec;
2255     }
2256     my $array = [];
2257     for my $field (qw/ lccn isbn issn title author dewey subject /)
2258     {
2259         push @$array, { name => $field, value => $bibrec->{$field} }
2260           if defined $bibrec->{$field};
2261     }
2262     return $array;
2263 }
2264
2265 =head2 GetDistinctValues($field);
2266
2267 C<$field> is a reference to the fields array
2268
2269 =cut
2270
2271 sub GetDistinctValues {
2272     my ($fieldname,$string)=@_;
2273     # returns a reference to a hash of references to branches...
2274     if ($fieldname=~/\./){
2275                         my ($table,$column)=split /\./, $fieldname;
2276                         my $dbh = C4::Context->dbh;
2277                         warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column " if $DEBUG;
2278                         my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
2279                         $sth->execute;
2280                         my $elements=$sth->fetchall_arrayref({});
2281                         return $elements;
2282    }
2283    else {
2284                 $string||= qq("");
2285                 my @servers=qw<biblioserver authorityserver>;
2286                 my (@zconns,@results);
2287         for ( my $i = 0 ; $i < @servers ; $i++ ) {
2288                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
2289                         $results[$i] =
2290                       $zconns[$i]->scan(
2291                         ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
2292                       );
2293                 }
2294                 # The big moment: asynchronously retrieve results from all servers
2295                 my @elements;
2296         _ZOOM_event_loop(
2297             \@zconns,
2298             \@results,
2299             sub {
2300                 my ( $i, $size ) = @_;
2301                 for ( my $j = 0 ; $j < $size ; $j++ ) {
2302                     my %hashscan;
2303                     @hashscan{qw(value cnt)} =
2304                       $results[ $i - 1 ]->display_term($j);
2305                     push @elements, \%hashscan;
2306                 }
2307             }
2308         );
2309                 return \@elements;
2310    }
2311 }
2312
2313 =head2 _ZOOM_event_loop
2314
2315     _ZOOM_event_loop(\@zconns, \@results, sub {
2316         my ( $i, $size ) = @_;
2317         ....
2318     } );
2319
2320 Processes a ZOOM event loop and passes control to a closure for
2321 processing the results, and destroying the resultsets.
2322
2323 =cut
2324
2325 sub _ZOOM_event_loop {
2326     my ($zconns, $results, $callback) = @_;
2327     while ( ( my $i = ZOOM::event( $zconns ) ) != 0 ) {
2328         my $ev = $zconns->[ $i - 1 ]->last_event();
2329         if ( $ev == ZOOM::Event::ZEND ) {
2330             next unless $results->[ $i - 1 ];
2331             my $size = $results->[ $i - 1 ]->size();
2332             if ( $size > 0 ) {
2333                 $callback->($i, $size);
2334             }
2335         }
2336     }
2337
2338     foreach my $result (@$results) {
2339         $result->destroy();
2340     }
2341 }
2342
2343 =head2 new_record_from_zebra
2344
2345 Given raw data from a searchengine result set, return a MARC::Record object
2346
2347 This helper function is needed to take into account all the involved
2348 system preferences and configuration variables to properly create the
2349 MARC::Record object.
2350
2351 If we are using GRS-1, then the raw data we get from Zebra should be USMARC
2352 data. If we are using DOM, then it has to be MARCXML.
2353
2354 If we are using elasticsearch, it'll already be a MARC::Record and this
2355 function needs a new name.
2356
2357 =cut
2358
2359 sub new_record_from_zebra {
2360
2361     my $server   = shift;
2362     my $raw_data = shift;
2363     # Set the default indexing modes
2364     my $search_engine = C4::Context->preference("SearchEngine");
2365     if ($search_engine eq 'Elasticsearch') {
2366         return ref $raw_data eq 'MARC::Record' ? $raw_data : MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2367     }
2368     my $index_mode = ( $server eq 'biblioserver' )
2369                         ? C4::Context->config('zebra_bib_index_mode') // 'dom'
2370                         : C4::Context->config('zebra_auth_index_mode') // 'dom';
2371
2372     my $marc_record =  eval {
2373         if ( $index_mode eq 'dom' ) {
2374             MARC::Record->new_from_xml( $raw_data, 'UTF-8' );
2375         } else {
2376             MARC::Record->new_from_usmarc( $raw_data );
2377         }
2378     };
2379
2380     if ($@) {
2381         return;
2382     } else {
2383         return $marc_record;
2384     }
2385
2386 }
2387
2388 END { }    # module clean-up code here (global destructor)
2389
2390 1;
2391 __END__
2392
2393 =head1 AUTHOR
2394
2395 Koha Development Team <http://koha-community.org/>
2396
2397 =cut