C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30
  31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  32
  33 # set the version for version checking
  34 BEGIN {
  35     $VERSION = 3.01;
  36     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  37 }
  38
  39 =head1 NAME
  40
  41 C4::Search - Functions for searching the Koha catalog.
  42
  43 =head1 SYNOPSIS
  44
  45 See opac/opac-search.pl or catalogue/search.pl for example of usage
  46
  47 =head1 DESCRIPTION
  48
  49 This module provides searching functions for Koha's bibliographic databases
  50
  51 =head1 FUNCTIONS
  52
  53 =cut
  54
  55 @ISA    = qw(Exporter);
  56 @EXPORT = qw(
  57   &findseealso
  58   &FindDuplicate
  59   &SimpleSearch
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64   &ModBiblios
  65 );
  66
  67 # make all your functions, whether exported or not;
  68
  69 =head2 findseealso($dbh,$fields);
  70
  71 C<$dbh> is a link to the DB handler.
  72
  73 use C4::Context;
  74 my $dbh =C4::Context->dbh;
  75
  76 C<$fields> is a reference to the fields array
  77
  78 This function modifies the @$fields array and adds related fields to search on.
  79
  80 FIXME: this function is probably deprecated in Koha 3
  81
  82 =cut
  83
  84 sub findseealso {
  85     my ( $dbh, $fields ) = @_;
  86     my $tagslib = GetMarcStructure(1);
  87     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  88         my ($tag)      = substr( @$fields[$i], 1, 3 );
  89         my ($subfield) = substr( @$fields[$i], 4, 1 );
  90         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  91           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  92     }
  93 }
  94
  95 =head2 FindDuplicate
  96
  97 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  98
  99 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
 100
 101 =cut
 102
 103 sub FindDuplicate {
 104     my ($record) = @_;
 105     my $dbh = C4::Context->dbh;
 106     my $result = TransformMarcToKoha( $dbh, $record, '' );
 107     my $sth;
 108     my $query;
 109     my $search;
 110     my $type;
 111     my ( $biblionumber, $title );
 112
 113     # search duplicate on ISBN, easy and fast..
 114     # ... normalize first
 115     if ( $result->{isbn} ) {
 116         $result->{isbn} =~ s/\(.*$//;
 117         $result->{isbn} =~ s/\s+$//;
 118         $query = "isbn=$result->{isbn}";
 119     }
 120     else {
 121         $result->{title} =~ s /\\//g;
 122         $result->{title} =~ s /\"//g;
 123         $result->{title} =~ s /\(//g;
 124         $result->{title} =~ s /\)//g;
 125
 126         # FIXME: instead of removing operators, could just do
 127         # quotes around the value
 128         $result->{title} =~ s/(and|or|not)//g;
 129         $query = "ti,ext=$result->{title}";
 130         $query .= " and itemtype=$result->{itemtype}"
 131           if ( $result->{itemtype} );
 132         if   ( $result->{author} ) {
 133             $result->{author} =~ s /\\//g;
 134             $result->{author} =~ s /\"//g;
 135             $result->{author} =~ s /\(//g;
 136             $result->{author} =~ s /\)//g;
 137
 138             # remove valid operators
 139             $result->{author} =~ s/(and|or|not)//g;
 140             $query .= " and au,ext=$result->{author}";
 141         }
 142     }
 143
 144     # FIXME: add error handling
 145     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 146     my @results;
 147     foreach my $possible_duplicate_record (@$searchresults) {
 148         my $marcrecord =
 149           MARC::Record->new_from_usmarc($possible_duplicate_record);
 150         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 151
 152         # FIXME :: why 2 $biblionumber ?
 153         if ($result) {
 154             push @results, $result->{'biblionumber'};
 155             push @results, $result->{'title'};
 156         }
 157     }
 158     return @results;
 159 }
 160
 161 =head2 SimpleSearch
 162
 163 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 164
 165 This function provides a simple search API on the bibliographic catalog
 166
 167 =over 2
 168
 169 =item C<input arg:>
 170
 171     * $query can be a simple keyword or a complete CCL query
 172     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 173     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 174     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 175
 176
 177 =item C<Output:>
 178
 179     * $error is a empty unless an error is detected
 180     * \@results is an array of records.
 181     * $total_hits is the number of hits that would have been returned with no limit
 182
 183 =item C<usage in the script:>
 184
 185 =back
 186
 187 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 188
 189 if (defined $error) {
 190     $template->param(query_error => $error);
 191     warn "error: ".$error;
 192     output_html_with_http_headers $input, $cookie, $template->output;
 193     exit;
 194 }
 195
 196 my $hits = scalar @$marcresults;
 197 my @results;
 198
 199 for my $i (0..$hits) {
 200     my %resultsloop;
 201     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 202     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 203
 204     #build the hash for the template.
 205     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 206     $resultsloop{title}           = $biblio->{'title'};
 207     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 208     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 209     $resultsloop{author}          = $biblio->{'author'};
 210     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 211     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 212
 213     push @results, \%resultsloop;
 214 }
 215
 216 $template->param(result=>\@results);
 217
 218 =cut
 219
 220 sub SimpleSearch {
 221     my ( $query, $offset, $max_results, $servers )  = @_;
 222
 223     if ( C4::Context->preference('NoZebra') ) {
 224         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 225         my $search_result =
 226           (      $result->{hits}
 227               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 228         return ( undef, $search_result, scalar($search_result) );
 229     }
 230     else {
 231         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 232         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 233         my @results;
 234         my @zoom_queries;
 235         my @tmpresults;
 236         my @zconns;
 237         my $total_hits;
 238         return ( "No query entered", undef, undef ) unless $query;
 239
 240         # Initialize & Search Zebra
 241         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 242             eval {
 243                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 244                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 245                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 246
 247                 # error handling
 248                 my $error =
 249                     $zconns[$i]->errmsg() . " ("
 250                   . $zconns[$i]->errcode() . ") "
 251                   . $zconns[$i]->addinfo() . " "
 252                   . $zconns[$i]->diagset();
 253
 254                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 255             };
 256             if ($@) {
 257
 258                 # caught a ZOOM::Exception
 259                 my $error =
 260                     $@->message() . " ("
 261                   . $@->code() . ") "
 262                   . $@->addinfo() . " "
 263                   . $@->diagset();
 264                 warn $error;
 265                 return ( $error, undef, undef );
 266             }
 267         }
 268         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 269             my $event = $zconns[ $i - 1 ]->last_event();
 270             if ( $event == ZOOM::Event::ZEND ) {
 271
 272                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 273                 my $hits = $tmpresults[ $i - 1 ]->size();
 274                 $total_hits += $hits;
 275                 my $last_record = $hits;
 276                 if ( defined $max_results && $offset + $max_results < $hits ) {
 277                     $last_record  = $offset + $max_results;
 278                 }
 279
 280                 for my $j ( $first_record..$last_record ) {
 281                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 282                     push @results, $record;
 283                 }
 284             }
 285         }
 286
 287         foreach my $result (@tmpresults) {
 288             $result->destroy();
 289         }
 290         foreach my $zoom_query (@zoom_queries) {
 291             $zoom_query->destroy();
 292         }
 293
 294         return ( undef, \@results, $total_hits );
 295     }
 296 }
 297
 298 =head2 getRecords
 299
 300 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 301
 302         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 303         $results_per_page, $offset,       $expanded_facet, $branches,
 304         $query_type,       $scan
 305     );
 306
 307 The all singing, all dancing, multi-server, asynchronous, scanning,
 308 searching, record nabbing, facet-building
 309
 310 See verbse embedded documentation.
 311
 312 =cut
 313
 314 sub getRecords {
 315     my (
 316         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 317         $results_per_page, $offset,       $expanded_facet, $branches,
 318         $query_type,       $scan
 319     ) = @_;
 320
 321     my @servers = @$servers_ref;
 322     my @sort_by = @$sort_by_ref;
 323
 324     # Initialize variables for the ZOOM connection and results object
 325     my $zconn;
 326     my @zconns;
 327     my @results;
 328     my $results_hashref = ();
 329
 330     # Initialize variables for the faceted results objects
 331     my $facets_counter = ();
 332     my $facets_info    = ();
 333     my $facets         = getFacets();
 334
 335     my @facets_loop
 336       ;    # stores the ref to array of hashes for template facets loop
 337
 338     ### LOOP THROUGH THE SERVERS
 339     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 340         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 341
 342 # perform the search, create the results objects
 343 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 344         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 345
 346         #$query_to_use = $simple_query if $scan;
 347         warn $simple_query if ( $scan and $DEBUG );
 348
 349         # Check if we've got a query_type defined, if so, use it
 350         eval {
 351             if ($query_type)
 352             {
 353                 if ( $query_type =~ /^ccl/ ) {
 354                     $query_to_use =~
 355                       s/\:/\=/g;    # change : to = last minute (FIXME)
 356                     $results[$i] =
 357                       $zconns[$i]->search(
 358                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 359                       );
 360                 }
 361                 elsif ( $query_type =~ /^cql/ ) {
 362                     $results[$i] =
 363                       $zconns[$i]->search(
 364                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 365                 }
 366                 elsif ( $query_type =~ /^pqf/ ) {
 367                     $results[$i] =
 368                       $zconns[$i]->search(
 369                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 370                 }
 371             }
 372             else {
 373                 if ($scan) {
 374                     $results[$i] =
 375                       $zconns[$i]->scan(
 376                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 377                       );
 378                 }
 379                 else {
 380                     $results[$i] =
 381                       $zconns[$i]->search(
 382                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 383                       );
 384                 }
 385             }
 386         };
 387         if ($@) {
 388             warn "WARNING: query problem with $query_to_use " . $@;
 389         }
 390
 391         # Concatenate the sort_by limits and pass them to the results object
 392         # Note: sort will override rank
 393         my $sort_by;
 394         foreach my $sort (@sort_by) {
 395             if ( $sort eq "author_az" ) {
 396                 $sort_by .= "1=1003 <i ";
 397             }
 398             elsif ( $sort eq "author_za" ) {
 399                 $sort_by .= "1=1003 >i ";
 400             }
 401             elsif ( $sort eq "popularity_asc" ) {
 402                 $sort_by .= "1=9003 <i ";
 403             }
 404             elsif ( $sort eq "popularity_dsc" ) {
 405                 $sort_by .= "1=9003 >i ";
 406             }
 407             elsif ( $sort eq "call_number_asc" ) {
 408                 $sort_by .= "1=20  <i ";
 409             }
 410             elsif ( $sort eq "call_number_dsc" ) {
 411                 $sort_by .= "1=20 >i ";
 412             }
 413             elsif ( $sort eq "pubdate_asc" ) {
 414                 $sort_by .= "1=31 <i ";
 415             }
 416             elsif ( $sort eq "pubdate_dsc" ) {
 417                 $sort_by .= "1=31 >i ";
 418             }
 419             elsif ( $sort eq "acqdate_asc" ) {
 420                 $sort_by .= "1=32 <i ";
 421             }
 422             elsif ( $sort eq "acqdate_dsc" ) {
 423                 $sort_by .= "1=32 >i ";
 424             }
 425             elsif ( $sort eq "title_az" ) {
 426                 $sort_by .= "1=4 <i ";
 427             }
 428             elsif ( $sort eq "title_za" ) {
 429                 $sort_by .= "1=4 >i ";
 430             }
 431         }
 432         if ($sort_by) {
 433             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 434                 warn "WARNING sort $sort_by failed";
 435             }
 436         }
 437     }    # finished looping through servers
 438
 439     # The big moment: asynchronously retrieve results from all servers
 440     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 441         my $ev = $zconns[ $i - 1 ]->last_event();
 442         if ( $ev == ZOOM::Event::ZEND ) {
 443             next unless $results[ $i - 1 ];
 444             my $size = $results[ $i - 1 ]->size();
 445             if ( $size > 0 ) {
 446                 my $results_hash;
 447
 448                 # loop through the results
 449                 $results_hash->{'hits'} = $size;
 450                 my $times;
 451                 if ( $offset + $results_per_page <= $size ) {
 452                     $times = $offset + $results_per_page;
 453                 }
 454                 else {
 455                     $times = $size;
 456                 }
 457                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 458                     my $records_hash;
 459                     my $record;
 460                     my $facet_record;
 461
 462                     ## Check if it's an index scan
 463                     if ($scan) {
 464                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 465
 466                  # here we create a minimal MARC record and hand it off to the
 467                  # template just like a normal result ... perhaps not ideal, but
 468                  # it works for now
 469                         my $tmprecord = MARC::Record->new();
 470                         $tmprecord->encoding('UTF-8');
 471                         my $tmptitle;
 472                         my $tmpauthor;
 473
 474                 # the minimal record in author/title (depending on MARC flavour)
 475                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 476                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 477                             $tmprecord->append_fields($tmptitle);
 478                         } else {
 479                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 480                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 481                             $tmprecord->append_fields($tmptitle);
 482                             $tmprecord->append_fields($tmpauthor);
 483                         }
 484                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 485                     }
 486
 487                     # not an index scan
 488                     else {
 489                         $record = $results[ $i - 1 ]->record($j)->raw();
 490
 491                         # warn "RECORD $j:".$record;
 492                         $results_hash->{'RECORDS'}[$j] = $record;
 493
 494             # Fill the facets while we're looping, but only for the biblioserver
 495                         $facet_record = MARC::Record->new_from_usmarc($record)
 496                           if $servers[ $i - 1 ] =~ /biblioserver/;
 497
 498                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 499                         if ($facet_record) {
 500                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 501
 502                                 if ( $facets->[$k] ) {
 503                                     my @fields;
 504                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 505                                     {
 506                                         push @fields,
 507                                           $facet_record->field($tag);
 508                                     }
 509                                     for my $field (@fields) {
 510                                         my @subfields = $field->subfields();
 511                                         for my $subfield (@subfields) {
 512                                             my ( $code, $data ) = @$subfield;
 513                                             if ( $code eq
 514                                                 $facets->[$k]->{'subfield'} )
 515                                             {
 516                                                 $facets_counter->{ $facets->[$k]
 517                                                       ->{'link_value'} }
 518                                                   ->{$data}++;
 519                                             }
 520                                         }
 521                                     }
 522                                     $facets_info->{ $facets->[$k]
 523                                           ->{'link_value'} }->{'label_value'} =
 524                                       $facets->[$k]->{'label_value'};
 525                                     $facets_info->{ $facets->[$k]
 526                                           ->{'link_value'} }->{'expanded'} =
 527                                       $facets->[$k]->{'expanded'};
 528                                 }
 529                             }
 530                         }
 531                     }
 532                 }
 533                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 534             }
 535
 536             # warn "connection ", $i-1, ": $size hits";
 537             # warn $results[$i-1]->record(0)->render() if $size > 0;
 538
 539             # BUILD FACETS
 540             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 541                 for my $link_value (
 542                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 543                     keys %$facets_counter )
 544                 {
 545                     my $expandable;
 546                     my $number_of_facets;
 547                     my @this_facets_array;
 548                     for my $one_facet (
 549                         sort {
 550                             $facets_counter->{$link_value}
 551                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 552                         } keys %{ $facets_counter->{$link_value} }
 553                       )
 554                     {
 555                         $number_of_facets++;
 556                         if (   ( $number_of_facets < 6 )
 557                             || ( $expanded_facet eq $link_value )
 558                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 559                         {
 560
 561                       # Sanitize the link value ), ( will cause errors with CCL,
 562                             my $facet_link_value = $one_facet;
 563                             $facet_link_value =~ s/(\(|\))/ /g;
 564
 565                             # fix the length that will display in the label,
 566                             my $facet_label_value = $one_facet;
 567                             $facet_label_value =
 568                               substr( $one_facet, 0, 20 ) . "..."
 569                               unless length($facet_label_value) <= 20;
 570
 571                             # if it's a branch, label by the name, not the code,
 572                             if ( $link_value =~ /branch/ ) {
 573                                 $facet_label_value =
 574                                   $branches->{$one_facet}->{'branchname'};
 575                             }
 576
 577                 # but we're down with the whole label being in the link's title.
 578                             my $facet_title_value = $one_facet;
 579
 580                             push @this_facets_array,
 581                               (
 582                                 {
 583                                     facet_count =>
 584                                       $facets_counter->{$link_value}
 585                                       ->{$one_facet},
 586                                     facet_label_value => $facet_label_value,
 587                                     facet_title_value => $facet_title_value,
 588                                     facet_link_value  => $facet_link_value,
 589                                     type_link_value   => $link_value,
 590                                 },
 591                               );
 592                         }
 593                     }
 594
 595                     # handle expanded option
 596                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 597                         $expandable = 1
 598                           if ( ( $number_of_facets > 6 )
 599                             && ( $expanded_facet ne $link_value ) );
 600                     }
 601                     push @facets_loop,
 602                       (
 603                         {
 604                             type_link_value => $link_value,
 605                             type_id         => $link_value . "_id",
 606                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 607                             facets     => \@this_facets_array,
 608                             expandable => $expandable,
 609                             expand     => $link_value,
 610                         }
 611                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 612                 }
 613             }
 614         }
 615     }
 616     return ( undef, $results_hashref, \@facets_loop );
 617 }
 618
 619 sub pazGetRecords {
 620     my (
 621         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 622         $results_per_page, $offset,       $expanded_facet, $branches,
 623         $query_type,       $scan
 624     ) = @_;
 625
 626     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 627     $paz->init();
 628     $paz->search($simple_query);
 629     sleep 1;
 630
 631     # do results
 632     my $results_hashref = {};
 633     my $stats = XMLin($paz->stat);
 634     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 635
 636     # for a grouped search result, the number of hits
 637     # is the number of groups returned; 'bib_hits' will have
 638     # the total number of bibs.
 639     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 640     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 641
 642     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 643         my $recid = $hit->{recid}->[0];
 644
 645         my $work_title = $hit->{'md-work-title'}->[0];
 646         my $work_author;
 647         if (exists $hit->{'md-work-author'}) {
 648             $work_author = $hit->{'md-work-author'}->[0];
 649         }
 650         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 651
 652         my $result_group = {};
 653         $result_group->{'group_label'} = $group_label;
 654         $result_group->{'group_merge_key'} = $recid;
 655
 656         my $count = 1;
 657         if (exists $hit->{count}) {
 658             $count = $hit->{count}->[0];
 659         }
 660         $result_group->{'group_count'} = $count;
 661
 662         for (my $i = 0; $i < $count; $i++) {
 663             # FIXME -- may need to worry about diacritics here
 664             my $rec = $paz->record($recid, $i);
 665             push @{ $result_group->{'RECORDS'} }, $rec;
 666         }
 667
 668         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 669     }
 670
 671     # pass through facets
 672     my $termlist_xml = $paz->termlist('author,subject');
 673     my $terms = XMLin($termlist_xml, forcearray => 1);
 674     my @facets_loop = ();
 675     #die Dumper($results);
 676 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 677 #        my @facets = ();
 678 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 679 #            push @facets, {
 680 #                facet_label_value => $facet->{'name'}->[0],
 681 #            };
 682 #        }
 683 #        push @facets_loop, ( {
 684 #            type_label => $list,
 685 #            facets => \@facets,
 686 #        } );
 687 #    }
 688
 689     return ( undef, $results_hashref, \@facets_loop );
 690 }
 691
 692 # STOPWORDS
 693 sub _remove_stopwords {
 694     my ( $operand, $index ) = @_;
 695     my @stopwords_removed;
 696
 697     # phrase and exact-qualified indexes shouldn't have stopwords removed
 698     if ( $index !~ m/phr|ext/ ) {
 699
 700 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 701 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 702 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 703 #       is a stopword, we'd get "çon" and wouldn't find anything...
 704         foreach ( keys %{ C4::Context->stopwords } ) {
 705             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 706             if ( $operand =~
 707                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 708             {
 709                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 710                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 711                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 712                                 $operand =~ s/$1//gi;
 713                 push @stopwords_removed, $_;
 714             }
 715         }
 716     }
 717     return ( $operand, \@stopwords_removed );
 718 }
 719
 720 # TRUNCATION
 721 sub _detect_truncation {
 722     my ( $operand, $index ) = @_;
 723     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 724         @regexpr );
 725     $operand =~ s/^ //g;
 726     my @wordlist = split( /\s/, $operand );
 727     foreach my $word (@wordlist) {
 728         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 729             push @rightlefttruncated, $word;
 730         }
 731         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 732             push @lefttruncated, $word;
 733         }
 734         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 735             push @righttruncated, $word;
 736         }
 737         elsif ( index( $word, "*" ) < 0 ) {
 738             push @nontruncated, $word;
 739         }
 740         else {
 741             push @regexpr, $word;
 742         }
 743     }
 744     return (
 745         \@nontruncated,       \@righttruncated, \@lefttruncated,
 746         \@rightlefttruncated, \@regexpr
 747     );
 748 }
 749
 750 # STEMMING
 751 sub _build_stemmed_operand {
 752     my ($operand) = @_;
 753     my $stemmed_operand;
 754
 755     # If operand contains a digit, it is almost certainly an identifier, and should
 756     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 757     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 758     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 759     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 760     return $operand if $operand =~ /\d/;
 761
 762 # FIXME: the locale should be set based on the user's language and/or search choice
 763     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 764
 765 # FIXME: these should be stored in the db so the librarian can modify the behavior
 766     $stemmer->add_exceptions(
 767         {
 768             'and' => 'and',
 769             'or'  => 'or',
 770             'not' => 'not',
 771         }
 772     );
 773     my @words = split( / /, $operand );
 774     my $stems = $stemmer->stem(@words);
 775     for my $stem (@$stems) {
 776         $stemmed_operand .= "$stem";
 777         $stemmed_operand .= "?"
 778           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 779         $stemmed_operand .= " ";
 780     }
 781     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 782     return $stemmed_operand;
 783 }
 784
 785 # FIELD WEIGHTING
 786 sub _build_weighted_query {
 787
 788 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 789 # pretty well but could work much better if we had a smarter query parser
 790     my ( $operand, $stemmed_operand, $index ) = @_;
 791     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 792     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 793     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 794
 795     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 796
 797     # Keyword, or, no index specified
 798     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 799         $weighted_query .=
 800           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 801         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 802         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 803           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 804           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 805         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 806           if $fuzzy_enabled;    # add fuzzy, word list
 807         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 808           if ( $stemming and $stemmed_operand )
 809           ;                     # add stemming, right truncation
 810         $weighted_query .= " or wrdl,r9=\"$operand\"";
 811
 812         # embedded sorting: 0 a-z; 1 z-a
 813         # $weighted_query .= ") or (sort1,aut=1";
 814     }
 815
 816     # Barcode searches should skip this process
 817     elsif ( $index eq 'bc' ) {
 818         $weighted_query .= "bc=\"$operand\"";
 819     }
 820
 821     # Authority-number searches should skip this process
 822     elsif ( $index eq 'an' ) {
 823         $weighted_query .= "an=\"$operand\"";
 824     }
 825
 826     # If the index already has more than one qualifier, wrap the operand
 827     # in quotes and pass it back (assumption is that the user knows what they
 828     # are doing and won't appreciate us mucking up their query
 829     elsif ( $index =~ ',' ) {
 830         $weighted_query .= " $index=\"$operand\"";
 831     }
 832
 833     #TODO: build better cases based on specific search indexes
 834     else {
 835         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 836           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 837         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 838         $weighted_query .=
 839           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 840     }
 841
 842     $weighted_query .= "))";                       # close rank specification
 843     return $weighted_query;
 844 }
 845
 846 =head2 buildQuery
 847
 848 ( $error, $query,
 849 $simple_query, $query_cgi,
 850 $query_desc, $limit,
 851 $limit_cgi, $limit_desc,
 852 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 853
 854 Build queries and limits in CCL, CGI, Human,
 855 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 856
 857 See verbose embedded documentation.
 858
 859
 860 =cut
 861
 862 sub buildQuery {
 863     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 864
 865     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 866
 867     # dereference
 868     my @operators = @$operators if $operators;
 869     my @indexes   = @$indexes   if $indexes;
 870     my @operands  = @$operands  if $operands;
 871     my @limits    = @$limits    if $limits;
 872     my @sort_by   = @$sort_by   if $sort_by;
 873
 874     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 875     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 876     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 877     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 878     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 879
 880     # no stemming/weight/fuzzy in NoZebra
 881     if ( C4::Context->preference("NoZebra") ) {
 882         $stemming      = 0;
 883         $weight_fields = 0;
 884         $fuzzy_enabled = 0;
 885     }
 886
 887     my $query        = $operands[0];
 888     my $simple_query = $operands[0];
 889
 890     # initialize the variables we're passing back
 891     my $query_cgi;
 892     my $query_desc;
 893     my $query_type;
 894
 895     my $limit;
 896     my $limit_cgi;
 897     my $limit_desc;
 898
 899     my $stopwords_removed;    # flag to determine if stopwords have been removed
 900
 901 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 902 # DIAGNOSTIC ONLY!!
 903     if ( $query =~ /^ccl=/ ) {
 904         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 905     }
 906     if ( $query =~ /^cql=/ ) {
 907         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 908     }
 909     if ( $query =~ /^pqf=/ ) {
 910         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 911     }
 912
 913     # pass nested queries directly
 914     # FIXME: need better handling of some of these variables in this case
 915     if ( $query =~ /(\(|\))/ ) {
 916         return (
 917             undef,              $query, $simple_query, $query_cgi,
 918             $query,             $limit, $limit_cgi,    $limit_desc,
 919             $stopwords_removed, 'ccl'
 920         );
 921     }
 922
 923 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 924 # query operands and indexes and add stemming, truncation, field weighting, etc.
 925 # Once we do so, we'll end up with a value in $query, just like if we had an
 926 # incoming $query from the user
 927     else {
 928         $query = ""
 929           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 930         my $previous_operand
 931           ;    # a flag used to keep track if there was a previous query
 932                # if there was, we can apply the current operator
 933                # for every operand
 934         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 935
 936             # COMBINE OPERANDS, INDEXES AND OPERATORS
 937             if ( $operands[$i] ) {
 938
 939               # A flag to determine whether or not to add the index to the query
 940                 my $indexes_set;
 941
 942 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 943                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 944                     $weight_fields    = 0;
 945                     $stemming         = 0;
 946                     $remove_stopwords = 0;
 947                 }
 948                 my $operand = $operands[$i];
 949                 my $index   = $indexes[$i];
 950
 951                 # Add index-specific attributes
 952                 # Date of Publication
 953                 if ( $index eq 'yr' ) {
 954                     $index .= ",st-numeric";
 955                     $indexes_set++;
 956                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 957                 }
 958
 959                 # Date of Acquisition
 960                 elsif ( $index eq 'acqdate' ) {
 961                     $index .= ",st-date-normalized";
 962                     $indexes_set++;
 963                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 964                 }
 965                 # ISBN,ISSN,Standard Number, don't need special treatment
 966                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 967                     $indexes_set++;
 968                     (
 969                         $stemming,      $auto_truncation,
 970                         $weight_fields, $fuzzy_enabled,
 971                         $remove_stopwords
 972                     ) = ( 0, 0, 0, 0, 0 );
 973
 974                 }
 975                 # Set default structure attribute (word list)
 976                 my $struct_attr;
 977                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 978                     $struct_attr = ",wrdl";
 979                 }
 980
 981                 # Some helpful index variants
 982                 my $index_plus       = $index . $struct_attr . ":" if $index;
 983                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 984
 985                 # Remove Stopwords
 986                 if ($remove_stopwords) {
 987                     ( $operand, $stopwords_removed ) =
 988                       _remove_stopwords( $operand, $index );
 989                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 990                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 991                       if ( $stopwords_removed && $DEBUG );
 992                 }
 993
 994                 # Detect Truncation
 995                 my $truncated_operand;
 996                 my( $nontruncated, $righttruncated, $lefttruncated,
 997                     $rightlefttruncated, $regexpr
 998                 ) = _detect_truncation( $operand, $index );
 999                 warn
1000 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1001                   if $DEBUG;
1002
1003                 # Apply Truncation
1004                 if (
1005                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1006                     scalar(@$rightlefttruncated) > 0 )
1007                 {
1008
1009                # Don't field weight or add the index to the query, we do it here
1010                     $indexes_set = 1;
1011                     undef $weight_fields;
1012                     my $previous_truncation_operand;
1013                     if ( scalar(@$nontruncated) > 0 ) {
1014                         $truncated_operand .= "$index_plus @$nontruncated ";
1015                         $previous_truncation_operand = 1;
1016                     }
1017                     if ( scalar(@$righttruncated) > 0 ) {
1018                         $truncated_operand .= "and "
1019                           if $previous_truncation_operand;
1020                         $truncated_operand .=
1021                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1022                         $previous_truncation_operand = 1;
1023                     }
1024                     if ( scalar(@$lefttruncated) > 0 ) {
1025                         $truncated_operand .= "and "
1026                           if $previous_truncation_operand;
1027                         $truncated_operand .=
1028                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1029                         $previous_truncation_operand = 1;
1030                     }
1031                     if ( scalar(@$rightlefttruncated) > 0 ) {
1032                         $truncated_operand .= "and "
1033                           if $previous_truncation_operand;
1034                         $truncated_operand .=
1035                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1036                         $previous_truncation_operand = 1;
1037                     }
1038                 }
1039                 $operand = $truncated_operand if $truncated_operand;
1040                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1041
1042                 # Handle Stemming
1043                 my $stemmed_operand;
1044                 $stemmed_operand = _build_stemmed_operand($operand)
1045                   if $stemming;
1046                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1047
1048                 # Handle Field Weighting
1049                 my $weighted_operand;
1050                 $weighted_operand =
1051                   _build_weighted_query( $operand, $stemmed_operand, $index )
1052                   if $weight_fields;
1053                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1054                 $operand = $weighted_operand if $weight_fields;
1055                 $indexes_set = 1 if $weight_fields;
1056
1057                 # If there's a previous operand, we need to add an operator
1058                 if ($previous_operand) {
1059
1060                     # User-specified operator
1061                     if ( $operators[ $i - 1 ] ) {
1062                         $query     .= " $operators[$i-1] ";
1063                         $query     .= " $index_plus " unless $indexes_set;
1064                         $query     .= " $operand";
1065                         $query_cgi .= "&op=$operators[$i-1]";
1066                         $query_cgi .= "&idx=$index" if $index;
1067                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1068                         $query_desc .=
1069                           " $operators[$i-1] $index_plus $operands[$i]";
1070                     }
1071
1072                     # Default operator is and
1073                     else {
1074                         $query      .= " and ";
1075                         $query      .= "$index_plus " unless $indexes_set;
1076                         $query      .= "$operand";
1077                         $query_cgi  .= "&op=and&idx=$index" if $index;
1078                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1079                         $query_desc .= " and $index_plus $operands[$i]";
1080                     }
1081                 }
1082
1083                 # There isn't a pervious operand, don't need an operator
1084                 else {
1085
1086                     # Field-weighted queries already have indexes set
1087                     $query .= " $index_plus " unless $indexes_set;
1088                     $query .= $operand;
1089                     $query_desc .= " $index_plus $operands[$i]";
1090                     $query_cgi  .= "&idx=$index" if $index;
1091                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1092                     $previous_operand = 1;
1093                 }
1094             }    #/if $operands
1095         }    # /for
1096     }
1097     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1098
1099     # add limits
1100     my $group_OR_limits;
1101     my $availability_limit;
1102     foreach my $this_limit (@limits) {
1103         if ( $this_limit =~ /available/ ) {
1104
1105 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1106 # In English:
1107 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1108             $availability_limit .=
1109 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1110             $limit_cgi  .= "&limit=available";
1111             $limit_desc .= "";
1112         }
1113
1114         # group_OR_limits, prefixed by mc-
1115         # OR every member of the group
1116         elsif ( $this_limit =~ /mc/ ) {
1117             $group_OR_limits .= " or " if $group_OR_limits;
1118             $limit_desc      .= " or " if $group_OR_limits;
1119             $group_OR_limits .= "$this_limit";
1120             $limit_cgi       .= "&limit=$this_limit";
1121             $limit_desc      .= " $this_limit";
1122         }
1123
1124         # Regular old limits
1125         else {
1126             $limit .= " and " if $limit || $query;
1127             $limit      .= "$this_limit";
1128             $limit_cgi  .= "&limit=$this_limit";
1129             if ($this_limit =~ /^branch:(.+)/) {
1130                 my $branchcode = $1;
1131                 my $branchname = GetBranchName($branchcode);
1132                 if (defined $branchname) {
1133                     $limit_desc .= " branch:$branchname";
1134                 } else {
1135                     $limit_desc .= " $this_limit";
1136                 }
1137             } else {
1138                 $limit_desc .= " $this_limit";
1139             }
1140         }
1141     }
1142     if ($group_OR_limits) {
1143         $limit .= " and " if ( $query || $limit );
1144         $limit .= "($group_OR_limits)";
1145     }
1146     if ($availability_limit) {
1147         $limit .= " and " if ( $query || $limit );
1148         $limit .= "($availability_limit)";
1149     }
1150
1151     # Normalize the query and limit strings
1152     $query =~ s/:/=/g;
1153     $limit =~ s/:/=/g;
1154     for ( $query, $query_desc, $limit, $limit_desc ) {
1155         $_ =~ s/  / /g;    # remove extra spaces
1156         $_ =~ s/^ //g;     # remove any beginning spaces
1157         $_ =~ s/ $//g;     # remove any ending spaces
1158         $_ =~ s/==/=/g;    # remove double == from query
1159     }
1160     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1161
1162     for ($query_cgi,$simple_query) {
1163         $_ =~ s/"//g;
1164     }
1165     # append the limit to the query
1166     $query .= " " . $limit;
1167
1168     # Warnings if DEBUG
1169     if ($DEBUG) {
1170         warn "QUERY:" . $query;
1171         warn "QUERY CGI:" . $query_cgi;
1172         warn "QUERY DESC:" . $query_desc;
1173         warn "LIMIT:" . $limit;
1174         warn "LIMIT CGI:" . $limit_cgi;
1175         warn "LIMIT DESC:" . $limit_desc;
1176         warn "---------\nLeave buildQuery\n---------";
1177     }
1178     return (
1179         undef,              $query, $simple_query, $query_cgi,
1180         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1181         $stopwords_removed, $query_type
1182     );
1183 }
1184
1185 =head2 searchResults
1186
1187 Format results in a form suitable for passing to the template
1188
1189 =cut
1190
1191 # IMO this subroutine is pretty messy still -- it's responsible for
1192 # building the HTML output for the template
1193 sub searchResults {
1194     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1195     my $dbh = C4::Context->dbh;
1196     my $even = 1;
1197     my @newresults;
1198
1199     # add search-term highlighting via <span>s on the search terms
1200     my $span_terms_hashref;
1201     for my $span_term ( split( / /, $searchdesc ) ) {
1202         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1203         $span_terms_hashref->{$span_term}++;
1204     }
1205
1206     #Build branchnames hash
1207     #find branchname
1208     #get branch information.....
1209     my %branches;
1210     my $bsth =
1211       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1212       ;    # FIXME : use C4::Koha::GetBranches
1213     $bsth->execute();
1214     while ( my $bdata = $bsth->fetchrow_hashref ) {
1215         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1216     }
1217 # FIXME - We build an authorised values hash here, using the default framework
1218 # though it is possible to have different authvals for different fws.
1219
1220     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1221
1222     # get notforloan authorised value list (see $shelflocations  FIXME)
1223     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1224
1225     #Build itemtype hash
1226     #find itemtype & itemtype image
1227     my %itemtypes;
1228     $bsth =
1229       $dbh->prepare(
1230         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1231       );
1232     $bsth->execute();
1233     while ( my $bdata = $bsth->fetchrow_hashref ) {
1234                 foreach (qw(description imageurl summary notforloan)) {
1235                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1236                 }
1237     }
1238
1239     #search item field code
1240     my $sth =
1241       $dbh->prepare(
1242 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1243       );
1244     $sth->execute;
1245     my ($itemtag) = $sth->fetchrow;
1246
1247     ## find column names of items related to MARC
1248     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1249     $sth2->execute;
1250     my %subfieldstosearch;
1251     while ( ( my $column ) = $sth2->fetchrow ) {
1252         my ( $tagfield, $tagsubfield ) =
1253           &GetMarcFromKohaField( "items." . $column, "" );
1254         $subfieldstosearch{$column} = $tagsubfield;
1255     }
1256
1257     # handle which records to actually retrieve
1258     my $times;
1259     if ( $hits && $offset + $results_per_page <= $hits ) {
1260         $times = $offset + $results_per_page;
1261     }
1262     else {
1263         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1264     }
1265
1266     # loop through all of the records we've retrieved
1267     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1268         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1269         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1270         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1271         $oldbiblio->{result_number} = $i + 1;
1272
1273         # add imageurl to itemtype if there is one
1274         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1275
1276         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1277         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1278         $aisbn =~ s/-//g;
1279         $oldbiblio->{amazonisbn} = $aisbn;
1280                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1281  # Build summary if there is one (the summary is defined in the itemtypes table)
1282  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1283         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1284             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1285             my @fields  = $marcrecord->fields();
1286             foreach my $field (@fields) {
1287                 my $tag      = $field->tag();
1288                 my $tagvalue = $field->as_string();
1289                 $summary =~
1290                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1291                 unless ( $tag < 10 ) {
1292                     my @subf = $field->subfields;
1293                     for my $i ( 0 .. $#subf ) {
1294                         my $subfieldcode  = $subf[$i][0];
1295                         my $subfieldvalue = $subf[$i][1];
1296                         my $tagsubf       = $tag . $subfieldcode;
1297                         $summary =~
1298 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1299                     }
1300                 }
1301             }
1302             # FIXME: yuk
1303             $summary =~ s/\[(.*?)]//g;
1304             $summary =~ s/\n/<br\/>/g;
1305             $oldbiblio->{summary} = $summary;
1306         }
1307
1308         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1309         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1310         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1311         # Add search-term highlighting to the whole record where they match using <span>s
1312         if (C4::Context->preference("OpacHighlightedWords")){
1313             my $searchhighlightblob;
1314             for my $highlight_field ( $marcrecord->fields ) {
1315
1316     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1317                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1318                 for my $subfield ($highlight_field->subfields()) {
1319                     my $match;
1320                     next if $subfield->[0] eq '9';
1321                     my $field = $subfield->[1];
1322                     for my $term ( keys %$span_terms_hashref ) {
1323                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1324                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1325                         $match++;
1326                         }
1327                     }
1328                     $searchhighlightblob .= $field . " ... " if $match;
1329                 }
1330
1331             }
1332             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1333             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1334         }
1335
1336         # Add search-term highlighting to the title, subtitle, etc. fields
1337         for my $term ( keys %$span_terms_hashref ) {
1338             my $old_term = $term;
1339             if ( length($term) > 3 ) {
1340                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1341                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1342                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1343                                 }
1344             }
1345         }
1346
1347         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1348
1349         # Pull out the items fields
1350         my @fields = $marcrecord->field($itemtag);
1351
1352         # Setting item statuses for display
1353         my @available_items_loop;
1354         my @onloan_items_loop;
1355         my @other_items_loop;
1356
1357         my $available_items;
1358         my $onloan_items;
1359         my $other_items;
1360
1361         my $ordered_count         = 0;
1362         my $available_count       = 0;
1363         my $onloan_count          = 0;
1364         my $longoverdue_count     = 0;
1365         my $other_count           = 0;
1366         my $wthdrawn_count        = 0;
1367         my $itemlost_count        = 0;
1368         my $itembinding_count     = 0;
1369         my $itemdamaged_count     = 0;
1370         my $item_in_transit_count = 0;
1371         my $can_place_holds       = 0;
1372         my $items_count           = scalar(@fields);
1373         my $maxitems =
1374           ( C4::Context->preference('maxItemsinSearchResults') )
1375           ? C4::Context->preference('maxItemsinSearchResults') - 1
1376           : 1;
1377
1378         # loop through every item
1379         foreach my $field (@fields) {
1380             my $item;
1381
1382             # populate the items hash
1383             foreach my $code ( keys %subfieldstosearch ) {
1384                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1385             }
1386                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1387                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1388             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1389             if ($item->{$hbranch}) {
1390                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1391             }
1392             elsif ($item->{$otherbranch}) {     # Last resort
1393                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1394             }
1395
1396                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1397 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1398             if ( $item->{onloan} ) {
1399                 $onloan_count++;
1400                                 my $key = $prefix . $item->{due_date};
1401                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1402                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1403                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1404                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1405                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1406                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1407                 # if something's checked out and lost, mark it as 'long overdue'
1408                 if ( $item->{itemlost} ) {
1409                     $onloan_items->{$prefix}->{longoverdue}++;
1410                     $longoverdue_count++;
1411                 } else {        # can place holds as long as item isn't lost
1412                     $can_place_holds = 1;
1413                 }
1414             }
1415
1416          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1417             else {
1418
1419                 # item is on order
1420                 if ( $item->{notforloan} == -1 ) {
1421                     $ordered_count++;
1422                 }
1423
1424                 # is item in transit?
1425                 my $transfertwhen = '';
1426                 my ($transfertfrom, $transfertto);
1427
1428                 unless ($item->{wthdrawn}
1429                         || $item->{itemlost}
1430                         || $item->{damaged}
1431                         || $item->{notforloan}
1432                         || $items_count > 20) {
1433
1434                     # A couple heuristics to limit how many times
1435                     # we query the database for item transfer information, sacrificing
1436                     # accuracy in some cases for speed;
1437                     #
1438                     # 1. don't query if item has one of the other statuses
1439                     # 2. don't check transit status if the bib has
1440                     #    more than 20 items
1441                     #
1442                     # FIXME: to avoid having the query the database like this, and to make
1443                     #        the in transit status count as unavailable for search limiting,
1444                     #        should map transit status to record indexed in Zebra.
1445                     #
1446                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1447                 }
1448
1449                 # item is withdrawn, lost or damaged
1450                 if (   $item->{wthdrawn}
1451                     || $item->{itemlost}
1452                     || $item->{damaged}
1453                     || $item->{notforloan}
1454                     || ($transfertwhen ne ''))
1455                 {
1456                     $wthdrawn_count++        if $item->{wthdrawn};
1457                     $itemlost_count++        if $item->{itemlost};
1458                     $itemdamaged_count++     if $item->{damaged};
1459                     $item_in_transit_count++ if $transfertwhen ne '';
1460                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1461                     $other_count++;
1462
1463                                         my $key = $prefix . $item->{status};
1464                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1465                         $other_items->{$key}->{$_} = $item->{$_};
1466                                         }
1467                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1468                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1469                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1470                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1471                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1472                 }
1473                 # item is available
1474                 else {
1475                     $can_place_holds = 1;
1476                     $available_count++;
1477                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1478                                         foreach (qw(branchname itemcallnumber)) {
1479                         $available_items->{$prefix}->{$_} = $item->{$_};
1480                                         }
1481                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1482                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1483                 }
1484             }
1485         }    # notforloan, item level and biblioitem level
1486         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1487         $maxitems =
1488           ( C4::Context->preference('maxItemsinSearchResults') )
1489           ? C4::Context->preference('maxItemsinSearchResults') - 1
1490           : 1;
1491         for my $key ( sort keys %$onloan_items ) {
1492             (++$onloanitemscount > $maxitems) and last;
1493             push @onloan_items_loop, $onloan_items->{$key};
1494         }
1495         for my $key ( sort keys %$other_items ) {
1496             (++$otheritemscount > $maxitems) and last;
1497             push @other_items_loop, $other_items->{$key};
1498         }
1499         for my $key ( sort keys %$available_items ) {
1500             (++$availableitemscount > $maxitems) and last;
1501             push @available_items_loop, $available_items->{$key}
1502         }
1503
1504         # XSLT processing of some stuff
1505         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1506             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1507             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1508         }
1509
1510         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1511         $can_place_holds = 0
1512           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1513         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1514         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1515         $oldbiblio->{items_count}          = $items_count;
1516         $oldbiblio->{available_items_loop} = \@available_items_loop;
1517         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1518         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1519         $oldbiblio->{availablecount}       = $available_count;
1520         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1521         $oldbiblio->{onloancount}          = $onloan_count;
1522         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1523         $oldbiblio->{othercount}           = $other_count;
1524         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1525         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1526         $oldbiblio->{itemlostcount}        = $itemlost_count;
1527         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1528         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1529         $oldbiblio->{orderedcount}         = $ordered_count;
1530         $oldbiblio->{isbn} =~
1531           s/-//g;    # deleting - in isbn to enable amazon content
1532         push( @newresults, $oldbiblio );
1533     }
1534     return @newresults;
1535 }
1536
1537 #----------------------------------------------------------------------
1538 #
1539 # Non-Zebra GetRecords#
1540 #----------------------------------------------------------------------
1541
1542 =head2 NZgetRecords
1543
1544   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1545
1546 =cut
1547
1548 sub NZgetRecords {
1549     my (
1550         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1551         $results_per_page, $offset,       $expanded_facet, $branches,
1552         $query_type,       $scan
1553     ) = @_;
1554     warn "query =$query" if $DEBUG;
1555     my $result = NZanalyse($query);
1556     warn "results =$result" if $DEBUG;
1557     return ( undef,
1558         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1559         undef );
1560 }
1561
1562 =head2 NZanalyse
1563
1564   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1565   the list is built from an inverted index in the nozebra SQL table
1566   note that title is here only for convenience : the sorting will be very fast when requested on title
1567   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1568
1569 =cut
1570
1571 sub NZanalyse {
1572     my ( $string, $server ) = @_;
1573 #     warn "---------"       if $DEBUG;
1574     warn " NZanalyse" if $DEBUG;
1575 #     warn "---------"       if $DEBUG;
1576
1577  # $server contains biblioserver or authorities, depending on what we search on.
1578  #warn "querying : $string on $server";
1579     $server = 'biblioserver' unless $server;
1580
1581 # if we have a ", replace the content to discard temporarily any and/or/not inside
1582     my $commacontent;
1583     if ( $string =~ /"/ ) {
1584         $string =~ s/"(.*?)"/__X__/;
1585         $commacontent = $1;
1586         warn "commacontent : $commacontent" if $DEBUG;
1587     }
1588
1589 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1590 # then, call again NZanalyse with $left and $right
1591 # (recursive until we find a leaf (=> something without and/or/not)
1592 # delete repeated operator... Would then go in infinite loop
1593     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1594     }
1595
1596     #process parenthesis before.
1597     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1598         my $left     = $1;
1599         my $right    = $4;
1600         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1601         warn
1602 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1603           if $DEBUG;
1604         my $leftresult = NZanalyse( $left, $server );
1605         if ($operator) {
1606             my $rightresult = NZanalyse( $right, $server );
1607
1608             # OK, we have the results for right and left part of the query
1609             # depending of operand, intersect, union or exclude both lists
1610             # to get a result list
1611             if ( $operator eq ' and ' ) {
1612                 return NZoperatorAND($leftresult,$rightresult);
1613             }
1614             elsif ( $operator eq ' or ' ) {
1615
1616                 # just merge the 2 strings
1617                 return $leftresult . $rightresult;
1618             }
1619             elsif ( $operator eq ' not ' ) {
1620                 return NZoperatorNOT($leftresult,$rightresult);
1621             }
1622         }
1623         else {
1624 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1625             return $leftresult;
1626         }
1627     }
1628     warn "string :" . $string if $DEBUG;
1629     my $left = "";
1630     my $right = "";
1631     my $operator = "";
1632     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1633         $left     = $1;
1634         $right    = $3;
1635         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1636     }
1637     warn "no parenthesis. left : $left operator: $operator right: $right"
1638       if $DEBUG;
1639
1640     # it's not a leaf, we have a and/or/not
1641     if ($operator) {
1642
1643         # reintroduce comma content if needed
1644         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1645         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1646         warn "node : $left / $operator / $right\n" if $DEBUG;
1647         my $leftresult  = NZanalyse( $left,  $server );
1648         my $rightresult = NZanalyse( $right, $server );
1649         warn " leftresult : $leftresult" if $DEBUG;
1650         warn " rightresult : $rightresult" if $DEBUG;
1651         # OK, we have the results for right and left part of the query
1652         # depending of operand, intersect, union or exclude both lists
1653         # to get a result list
1654         if ( $operator eq ' and ' ) {
1655             warn "NZAND";
1656             return NZoperatorAND($leftresult,$rightresult);
1657         }
1658         elsif ( $operator eq ' or ' ) {
1659
1660             # just merge the 2 strings
1661             return $leftresult . $rightresult;
1662         }
1663         elsif ( $operator eq ' not ' ) {
1664             return NZoperatorNOT($leftresult,$rightresult);
1665         }
1666         else {
1667
1668 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1669             die "error : operand unknown : $operator for $string";
1670         }
1671
1672         # it's a leaf, do the real SQL query and return the result
1673     }
1674     else {
1675         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1676         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1677         #remove trailing blank at the beginning
1678         $string =~ s/^ //g;
1679         warn "leaf:$string" if $DEBUG;
1680
1681         # parse the string in in operator/operand/value again
1682         my $left = "";
1683         my $operator = "";
1684         my $right = "";
1685         if ($string =~ /(.*)(>=|<=)(.*)/) {
1686             $left     = $1;
1687             $operator = $2;
1688             $right    = $3;
1689         } else {
1690             $left = $string;
1691         }
1692 #         warn "handling leaf... left:$left operator:$operator right:$right"
1693 #           if $DEBUG;
1694         unless ($operator) {
1695             if ($string =~ /(.*)(>|<|=)(.*)/) {
1696                 $left     = $1;
1697                 $operator = $2;
1698                 $right    = $3;
1699                 warn
1700     "handling unless (operator)... left:$left operator:$operator right:$right"
1701                 if $DEBUG;
1702             } else {
1703                 $left = $string;
1704             }
1705         }
1706         my $results;
1707
1708 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1709         $left =~ s/ .*$//;
1710
1711         # automatic replace for short operators
1712         $left = 'title'            if $left =~ '^ti$';
1713         $left = 'author'           if $left =~ '^au$';
1714         $left = 'publisher'        if $left =~ '^pb$';
1715         $left = 'subject'          if $left =~ '^su$';
1716         $left = 'koha-Auth-Number' if $left =~ '^an$';
1717         $left = 'keyword'          if $left =~ '^kw$';
1718         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1719         if ( $operator && $left ne 'keyword' ) {
1720
1721             #do a specific search
1722             my $dbh = C4::Context->dbh;
1723             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1724             my $sth =
1725               $dbh->prepare(
1726 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1727               );
1728             warn "$left / $operator / $right\n" if $DEBUG;
1729
1730             # split each word, query the DB and build the biblionumbers result
1731             #sanitizing leftpart
1732             $left =~ s/^\s+|\s+$//;
1733             foreach ( split / /, $right ) {
1734                 my $biblionumbers;
1735                 $_ =~ s/^\s+|\s+$//;
1736                 next unless $_;
1737                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1738                 $sth->execute( $server, $left, $_ )
1739                   or warn "execute failed: $!";
1740                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1741
1742 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1743 # otherwise, fill the result
1744                     $biblionumbers .= $line
1745                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1746                     warn "result : $value "
1747                       . ( $right  =~ /\d/ ) . "=="
1748                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1749                 }
1750
1751 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1752                 if ($results) {
1753                     warn "NZAND" if $DEBUG;
1754                     $results = NZoperatorAND($biblionumbers,$results);
1755                 }
1756                 else {
1757                     $results = $biblionumbers;
1758                 }
1759             }
1760         }
1761         else {
1762
1763       #do a complete search (all indexes), if index='kw' do complete search too.
1764             my $dbh = C4::Context->dbh;
1765             my $sth =
1766               $dbh->prepare(
1767 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1768               );
1769
1770             # split each word, query the DB and build the biblionumbers result
1771             foreach ( split / /, $string ) {
1772                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1773                 warn "search on all indexes on $_" if $DEBUG;
1774                 my $biblionumbers;
1775                 next unless $_;
1776                 $sth->execute( $server, $_ );
1777                 while ( my $line = $sth->fetchrow ) {
1778                     $biblionumbers .= $line;
1779                 }
1780
1781 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1782                 if ($results) {
1783                     $results = NZoperatorAND($biblionumbers,$results);
1784                 }
1785                 else {
1786                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1787                     $results = $biblionumbers;
1788                 }
1789             }
1790         }
1791         warn "return : $results for LEAF : $string" if $DEBUG;
1792         return $results;
1793     }
1794     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1795 }
1796
1797 sub NZoperatorAND{
1798     my ($rightresult, $leftresult)=@_;
1799
1800     my @leftresult = split /;/, $leftresult;
1801     warn " @leftresult / $rightresult \n" if $DEBUG;
1802
1803     #             my @rightresult = split /;/,$leftresult;
1804     my $finalresult;
1805
1806 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1807 # the result is stored twice, to have the same weight for AND than OR.
1808 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1809 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1810     foreach (@leftresult) {
1811         my $value = $_;
1812         my $countvalue;
1813         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1814         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1815             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1816             $finalresult .=
1817                 "$value-$countvalue;$value-$countvalue;";
1818         }
1819     }
1820     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1821     return $finalresult;
1822 }
1823
1824 sub NZoperatorOR{
1825     my ($rightresult, $leftresult)=@_;
1826     return $rightresult.$leftresult;
1827 }
1828
1829 sub NZoperatorNOT{
1830     my ($leftresult, $rightresult)=@_;
1831
1832     my @leftresult = split /;/, $leftresult;
1833
1834     #             my @rightresult = split /;/,$leftresult;
1835     my $finalresult;
1836     foreach (@leftresult) {
1837         my $value=$_;
1838         $value=$1 if $value=~m/(.*)-\d+$/;
1839         unless ($rightresult =~ "$value-") {
1840             $finalresult .= "$_;";
1841         }
1842     }
1843     return $finalresult;
1844 }
1845
1846 =head2 NZorder
1847
1848   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1849
1850   TODO :: Description
1851
1852 =cut
1853
1854 sub NZorder {
1855     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1856     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1857
1858     # order title asc by default
1859     #     $ordering = '1=36 <i' unless $ordering;
1860     $results_per_page = 20 unless $results_per_page;
1861     $offset           = 0  unless $offset;
1862     my $dbh = C4::Context->dbh;
1863
1864     #
1865     # order by POPULARITY
1866     #
1867     if ( $ordering =~ /popularity/ ) {
1868         my %result;
1869         my %popularity;
1870
1871         # popularity is not in MARC record, it's builded from a specific query
1872         my $sth =
1873           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1874         foreach ( split /;/, $biblionumbers ) {
1875             my ( $biblionumber, $title ) = split /,/, $_;
1876             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1877             $sth->execute($biblionumber);
1878             my $popularity = $sth->fetchrow || 0;
1879
1880 # hint : the key is popularity.title because we can have
1881 # many results with the same popularity. In this cas, sub-ordering is done by title
1882 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1883 # (un-frequent, I agree, but we won't forget anything that way ;-)
1884             $popularity{ sprintf( "%10d", $popularity ) . $title
1885                   . $biblionumber } = $biblionumber;
1886         }
1887
1888     # sort the hash and return the same structure as GetRecords (Zebra querying)
1889         my $result_hash;
1890         my $numbers = 0;
1891         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1892             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1893                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1894                   $result{ $popularity{$key} }->as_usmarc();
1895             }
1896         }
1897         else {                                    # sort popularity ASC
1898             foreach my $key ( sort ( keys %popularity ) ) {
1899                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1900                   $result{ $popularity{$key} }->as_usmarc();
1901             }
1902         }
1903         my $finalresult = ();
1904         $result_hash->{'hits'}         = $numbers;
1905         $finalresult->{'biblioserver'} = $result_hash;
1906         return $finalresult;
1907
1908         #
1909         # ORDER BY author
1910         #
1911     }
1912     elsif ( $ordering =~ /author/ ) {
1913         my %result;
1914         foreach ( split /;/, $biblionumbers ) {
1915             my ( $biblionumber, $title ) = split /,/, $_;
1916             my $record = GetMarcBiblio($biblionumber);
1917             my $author;
1918             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1919                 $author = $record->subfield( '200', 'f' );
1920                 $author = $record->subfield( '700', 'a' ) unless $author;
1921             }
1922             else {
1923                 $author = $record->subfield( '100', 'a' );
1924             }
1925
1926 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1927 # and we don't want to get only 1 result for each of them !!!
1928             $result{ $author . $biblionumber } = $record;
1929         }
1930
1931     # sort the hash and return the same structure as GetRecords (Zebra querying)
1932         my $result_hash;
1933         my $numbers = 0;
1934         if ( $ordering eq 'author_za' ) {    # sort by author desc
1935             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1936                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1937                   $result{$key}->as_usmarc();
1938             }
1939         }
1940         else {                               # sort by author ASC
1941             foreach my $key ( sort ( keys %result ) ) {
1942                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1943                   $result{$key}->as_usmarc();
1944             }
1945         }
1946         my $finalresult = ();
1947         $result_hash->{'hits'}         = $numbers;
1948         $finalresult->{'biblioserver'} = $result_hash;
1949         return $finalresult;
1950
1951         #
1952         # ORDER BY callnumber
1953         #
1954     }
1955     elsif ( $ordering =~ /callnumber/ ) {
1956         my %result;
1957         foreach ( split /;/, $biblionumbers ) {
1958             my ( $biblionumber, $title ) = split /,/, $_;
1959             my $record = GetMarcBiblio($biblionumber);
1960             my $callnumber;
1961             my $frameworkcode = GetFrameworkCode($biblionumber);
1962             my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField(  'items.itemcallnumber', $frameworkcode);
1963                ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber', $frameworkcode)
1964                 unless $callnumber_tag;
1965             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1966                 $callnumber = $record->subfield( '200', 'f' );
1967             } else {
1968                 $callnumber = $record->subfield( '100', 'a' );
1969             }
1970
1971 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1972 # and we don't want to get only 1 result for each of them !!!
1973             $result{ $callnumber . $biblionumber } = $record;
1974         }
1975
1976     # sort the hash and return the same structure as GetRecords (Zebra querying)
1977         my $result_hash;
1978         my $numbers = 0;
1979         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1980             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1981                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1982                   $result{$key}->as_usmarc();
1983             }
1984         }
1985         else {                                     # sort by title ASC
1986             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1987                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1988                   $result{$key}->as_usmarc();
1989             }
1990         }
1991         my $finalresult = ();
1992         $result_hash->{'hits'}         = $numbers;
1993         $finalresult->{'biblioserver'} = $result_hash;
1994         return $finalresult;
1995     }
1996     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1997         my %result;
1998         foreach ( split /;/, $biblionumbers ) {
1999             my ( $biblionumber, $title ) = split /,/, $_;
2000             my $record = GetMarcBiblio($biblionumber);
2001             my ( $publicationyear_tag, $publicationyear_subfield ) =
2002               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2003             my $publicationyear =
2004               $record->subfield( $publicationyear_tag,
2005                 $publicationyear_subfield );
2006
2007 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2008 # and we don't want to get only 1 result for each of them !!!
2009             $result{ $publicationyear . $biblionumber } = $record;
2010         }
2011
2012     # sort the hash and return the same structure as GetRecords (Zebra querying)
2013         my $result_hash;
2014         my $numbers = 0;
2015         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2016             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2017                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2018                   $result{$key}->as_usmarc();
2019             }
2020         }
2021         else {                                 # sort by pub year ASC
2022             foreach my $key ( sort ( keys %result ) ) {
2023                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2024                   $result{$key}->as_usmarc();
2025             }
2026         }
2027         my $finalresult = ();
2028         $result_hash->{'hits'}         = $numbers;
2029         $finalresult->{'biblioserver'} = $result_hash;
2030         return $finalresult;
2031
2032         #
2033         # ORDER BY title
2034         #
2035     }
2036     elsif ( $ordering =~ /title/ ) {
2037
2038 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2039         my %result;
2040         foreach ( split /;/, $biblionumbers ) {
2041             my ( $biblionumber, $title ) = split /,/, $_;
2042
2043 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2044 # and we don't want to get only 1 result for each of them !!!
2045 # hint & speed improvement : we can order without reading the record
2046 # so order, and read records only for the requested page !
2047             $result{ $title . $biblionumber } = $biblionumber;
2048         }
2049
2050     # sort the hash and return the same structure as GetRecords (Zebra querying)
2051         my $result_hash;
2052         my $numbers = 0;
2053         if ( $ordering eq 'title_az' ) {    # sort by title desc
2054             foreach my $key ( sort ( keys %result ) ) {
2055                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2056             }
2057         }
2058         else {                              # sort by title ASC
2059             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2060                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2061             }
2062         }
2063
2064         # limit the $results_per_page to result size if it's more
2065         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2066
2067         # for the requested page, replace biblionumber by the complete record
2068         # speed improvement : avoid reading too much things
2069         for (
2070             my $counter = $offset ;
2071             $counter <= $offset + $results_per_page ;
2072             $counter++
2073           )
2074         {
2075             $result_hash->{'RECORDS'}[$counter] =
2076               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2077         }
2078         my $finalresult = ();
2079         $result_hash->{'hits'}         = $numbers;
2080         $finalresult->{'biblioserver'} = $result_hash;
2081         return $finalresult;
2082     }
2083     else {
2084
2085 #
2086 # order by ranking
2087 #
2088 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2089         my %result;
2090         my %count_ranking;
2091         foreach ( split /;/, $biblionumbers ) {
2092             my ( $biblionumber, $title ) = split /,/, $_;
2093             $title =~ /(.*)-(\d)/;
2094
2095             # get weight
2096             my $ranking = $2;
2097
2098 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2099 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2100 # biblio N has ranking = 6
2101             $count_ranking{$biblionumber} += $ranking;
2102         }
2103
2104 # build the result by "inverting" the count_ranking hash
2105 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2106 #         warn "counting";
2107         foreach ( keys %count_ranking ) {
2108             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2109         }
2110
2111     # sort the hash and return the same structure as GetRecords (Zebra querying)
2112         my $result_hash;
2113         my $numbers = 0;
2114         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2115             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2116         }
2117
2118         # limit the $results_per_page to result size if it's more
2119         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2120
2121         # for the requested page, replace biblionumber by the complete record
2122         # speed improvement : avoid reading too much things
2123         for (
2124             my $counter = $offset ;
2125             $counter <= $offset + $results_per_page ;
2126             $counter++
2127           )
2128         {
2129             $result_hash->{'RECORDS'}[$counter] =
2130               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2131               if $result_hash->{'RECORDS'}[$counter];
2132         }
2133         my $finalresult = ();
2134         $result_hash->{'hits'}         = $numbers;
2135         $finalresult->{'biblioserver'} = $result_hash;
2136         return $finalresult;
2137     }
2138 }
2139
2140 =head2 ModBiblios
2141
2142 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2143
2144 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2145 test parameter if set donot perform change to records in database.
2146
2147 =over 2
2148
2149 =item C<input arg:>
2150
2151     * $listbiblios is an array ref to marcrecords to be changed
2152     * $tagsubfield is the reference of the subfield to change.
2153     * $initvalue is the value to search the record for
2154     * $targetvalue is the value to set the subfield to
2155     * $test is to be set only not to perform changes in database.
2156
2157 =item C<Output arg:>
2158     * $countchanged counts all the changes performed.
2159     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2160
2161 =item C<usage in the script:>
2162
2163 =back
2164
2165 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2166 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2167 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2168
2169 =cut
2170
2171 sub ModBiblios {
2172     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2173     my $countmatched;
2174     my @unmatched;
2175     my ( $tag, $subfield ) = ( $1, $2 )
2176       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2177     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2178         $tag = $tag . $subfield;
2179         undef $subfield;
2180     }
2181     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber', '');
2182     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber', '');
2183     if ($tag eq $itemtag) {
2184         # do not allow the embedded item tag to be
2185         # edited from here
2186         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2187         return (0, []);
2188     }
2189     foreach my $usmarc (@$listbiblios) {
2190         my $record;
2191         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2192         my $biblionumber;
2193         if ($@) {
2194
2195             # usmarc is not a valid usmarc May be a biblionumber
2196             # FIXME - sorry, please let's figure out whether
2197             #         this function is to be passed a list of
2198             #         record numbers or a list of MARC::Record
2199             #         objects.  The former is probably better
2200             #         because the MARC records supplied by Zebra
2201             #         may be not current.
2202             $record       = GetMarcBiblio($usmarc);
2203             $biblionumber = $usmarc;
2204         }
2205         else {
2206             if ( $bntag >= 010 ) {
2207                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2208             }
2209             else {
2210                 $biblionumber = $record->field($bntag)->data;
2211             }
2212         }
2213
2214         #GetBiblionumber is to be written.
2215         #Could be replaced by TransformMarcToKoha (But Would be longer)
2216         if ( $record->field($tag) ) {
2217             my $modify = 0;
2218             foreach my $field ( $record->field($tag) ) {
2219                 if ($subfield) {
2220                     if (
2221                         $field->delete_subfield(
2222                             'code'  => $subfield,
2223                             'match' => qr($initvalue)
2224                         )
2225                       )
2226                     {
2227                         $countmatched++;
2228                         $modify = 1;
2229                         $field->update( $subfield, $targetvalue )
2230                           if ($targetvalue);
2231                     }
2232                 }
2233                 else {
2234                     if ( $tag >= 010 ) {
2235                         if ( $field->delete_field($field) ) {
2236                             $countmatched++;
2237                             $modify = 1;
2238                         }
2239                     }
2240                     else {
2241                         $field->data = $targetvalue
2242                           if ( $field->data =~ qr($initvalue) );
2243                     }
2244                 }
2245             }
2246
2247             #       warn $record->as_formatted;
2248             if ($modify) {
2249                 ModBiblio( $record, $biblionumber,
2250                     GetFrameworkCode($biblionumber) )
2251                   unless ($test);
2252             }
2253             else {
2254                 push @unmatched, $biblionumber;
2255             }
2256         }
2257         else {
2258             push @unmatched, $biblionumber;
2259         }
2260     }
2261     return ( $countmatched, \@unmatched );
2262 }
2263
2264 END { }    # module clean-up code here (global destructor)
2265
2266 1;
2267 __END__
2268
2269 =head1 AUTHOR
2270
2271 Koha Developement team <info@koha.org>
2272
2273 =cut