C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField
  22 use C4::Koha;      # getFacets
  23 use Lingua::Stem;
  24 use C4::Search::PazPar2;
  25 use XML::Simple;
  26 use C4::Dates qw(format_date);
  27 use C4::XSLT;
  28
  29 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  30
  31 # set the version for version checking
  32 BEGIN {
  33     $VERSION = 3.01;
  34     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  35 }
  36
  37 =head1 NAME
  38
  39 C4::Search - Functions for searching the Koha catalog.
  40
  41 =head1 SYNOPSIS
  42
  43 See opac/opac-search.pl or catalogue/search.pl for example of usage
  44
  45 =head1 DESCRIPTION
  46
  47 This module provides searching functions for Koha's bibliographic databases
  48
  49 =head1 FUNCTIONS
  50
  51 =cut
  52
  53 @ISA    = qw(Exporter);
  54 @EXPORT = qw(
  55   &findseealso
  56   &FindDuplicate
  57   &SimpleSearch
  58   &searchResults
  59   &getRecords
  60   &buildQuery
  61   &NZgetRecords
  62   &ModBiblios
  63 );
  64
  65 # make all your functions, whether exported or not;
  66
  67 =head2 findseealso($dbh,$fields);
  68
  69 C<$dbh> is a link to the DB handler.
  70
  71 use C4::Context;
  72 my $dbh =C4::Context->dbh;
  73
  74 C<$fields> is a reference to the fields array
  75
  76 This function modifies the @$fields array and adds related fields to search on.
  77
  78 FIXME: this function is probably deprecated in Koha 3
  79
  80 =cut
  81
  82 sub findseealso {
  83     my ( $dbh, $fields ) = @_;
  84     my $tagslib = GetMarcStructure(1);
  85     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  86         my ($tag)      = substr( @$fields[$i], 1, 3 );
  87         my ($subfield) = substr( @$fields[$i], 4, 1 );
  88         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  89           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  90     }
  91 }
  92
  93 =head2 FindDuplicate
  94
  95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  96
  97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  98
  99 =cut
 100
 101 sub FindDuplicate {
 102     my ($record) = @_;
 103     my $dbh = C4::Context->dbh;
 104     my $result = TransformMarcToKoha( $dbh, $record, '' );
 105     my $sth;
 106     my $query;
 107     my $search;
 108     my $type;
 109     my ( $biblionumber, $title );
 110
 111     # search duplicate on ISBN, easy and fast..
 112     # ... normalize first
 113     if ( $result->{isbn} ) {
 114         $result->{isbn} =~ s/\(.*$//;
 115         $result->{isbn} =~ s/\s+$//;
 116         $query = "isbn=$result->{isbn}";
 117     }
 118     else {
 119         $result->{title} =~ s /\\//g;
 120         $result->{title} =~ s /\"//g;
 121         $result->{title} =~ s /\(//g;
 122         $result->{title} =~ s /\)//g;
 123
 124         # FIXME: instead of removing operators, could just do
 125         # quotes around the value
 126         $result->{title} =~ s/(and|or|not)//g;
 127         $query = "ti,ext=$result->{title}";
 128         $query .= " and itemtype=$result->{itemtype}"
 129           if ( $result->{itemtype} );
 130         if   ( $result->{author} ) {
 131             $result->{author} =~ s /\\//g;
 132             $result->{author} =~ s /\"//g;
 133             $result->{author} =~ s /\(//g;
 134             $result->{author} =~ s /\)//g;
 135
 136             # remove valid operators
 137             $result->{author} =~ s/(and|or|not)//g;
 138             $query .= " and au,ext=$result->{author}";
 139         }
 140     }
 141
 142     # FIXME: add error handling
 143     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 144     my @results;
 145     foreach my $possible_duplicate_record (@$searchresults) {
 146         my $marcrecord =
 147           MARC::Record->new_from_usmarc($possible_duplicate_record);
 148         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 149
 150         # FIXME :: why 2 $biblionumber ?
 151         if ($result) {
 152             push @results, $result->{'biblionumber'};
 153             push @results, $result->{'title'};
 154         }
 155     }
 156     return @results;
 157 }
 158
 159 =head2 SimpleSearch
 160
 161 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 162
 163 This function provides a simple search API on the bibliographic catalog
 164
 165 =over 2
 166
 167 =item C<input arg:>
 168
 169     * $query can be a simple keyword or a complete CCL query
 170     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 171     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 172     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 173
 174
 175 =item C<Output:>
 176
 177     * $error is a empty unless an error is detected
 178     * \@results is an array of records.
 179     * $total_hits is the number of hits that would have been returned with no limit
 180
 181 =item C<usage in the script:>
 182
 183 =back
 184
 185 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 186
 187 if (defined $error) {
 188     $template->param(query_error => $error);
 189     warn "error: ".$error;
 190     output_html_with_http_headers $input, $cookie, $template->output;
 191     exit;
 192 }
 193
 194 my $hits = scalar @$marcresults;
 195 my @results;
 196
 197 for my $i (0..$hits) {
 198     my %resultsloop;
 199     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 200     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 201
 202     #build the hash for the template.
 203     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 204     $resultsloop{title}           = $biblio->{'title'};
 205     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 206     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 207     $resultsloop{author}          = $biblio->{'author'};
 208     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 209     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 210
 211     push @results, \%resultsloop;
 212 }
 213
 214 $template->param(result=>\@results);
 215
 216 =cut
 217
 218 sub SimpleSearch {
 219     my ( $query, $offset, $max_results, $servers )  = @_;
 220
 221     if ( C4::Context->preference('NoZebra') ) {
 222         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 223         my $search_result =
 224           (      $result->{hits}
 225               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 226         return ( undef, $search_result, scalar($search_result) );
 227     }
 228     else {
 229         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 230         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 231         my @results;
 232         my @zoom_queries;
 233         my @tmpresults;
 234         my @zconns;
 235         my $total_hits;
 236         return ( "No query entered", undef, undef ) unless $query;
 237
 238         # Initialize & Search Zebra
 239         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 240             eval {
 241                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 242                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 243                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 244
 245                 # error handling
 246                 my $error =
 247                     $zconns[$i]->errmsg() . " ("
 248                   . $zconns[$i]->errcode() . ") "
 249                   . $zconns[$i]->addinfo() . " "
 250                   . $zconns[$i]->diagset();
 251
 252                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 253             };
 254             if ($@) {
 255
 256                 # caught a ZOOM::Exception
 257                 my $error =
 258                     $@->message() . " ("
 259                   . $@->code() . ") "
 260                   . $@->addinfo() . " "
 261                   . $@->diagset();
 262                 warn $error;
 263                 return ( $error, undef, undef );
 264             }
 265         }
 266         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 267             my $event = $zconns[ $i - 1 ]->last_event();
 268             if ( $event == ZOOM::Event::ZEND ) {
 269
 270                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 271                 my $hits = $tmpresults[ $i - 1 ]->size();
 272                 $total_hits += $hits;
 273                 my $last_record = $hits;
 274                 if ( defined $max_results && $offset + $max_results < $hits ) {
 275                     $last_record  = $offset + $max_results;
 276                 }
 277
 278                 for my $j ( $first_record..$last_record ) {
 279                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 280                     push @results, $record;
 281                 }
 282             }
 283         }
 284
 285         foreach my $result (@tmpresults) {
 286             $result->destroy();
 287         }
 288         foreach my $zoom_query (@zoom_queries) {
 289             $zoom_query->destroy();
 290         }
 291
 292         return ( undef, \@results, $total_hits );
 293     }
 294 }
 295
 296 =head2 getRecords
 297
 298 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 299
 300         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 301         $results_per_page, $offset,       $expanded_facet, $branches,
 302         $query_type,       $scan
 303     );
 304
 305 The all singing, all dancing, multi-server, asynchronous, scanning,
 306 searching, record nabbing, facet-building
 307
 308 See verbse embedded documentation.
 309
 310 =cut
 311
 312 sub getRecords {
 313     my (
 314         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 315         $results_per_page, $offset,       $expanded_facet, $branches,
 316         $query_type,       $scan
 317     ) = @_;
 318
 319     my @servers = @$servers_ref;
 320     my @sort_by = @$sort_by_ref;
 321
 322     # Initialize variables for the ZOOM connection and results object
 323     my $zconn;
 324     my @zconns;
 325     my @results;
 326     my $results_hashref = ();
 327
 328     # Initialize variables for the faceted results objects
 329     my $facets_counter = ();
 330     my $facets_info    = ();
 331     my $facets         = getFacets();
 332
 333     my @facets_loop
 334       ;    # stores the ref to array of hashes for template facets loop
 335
 336     ### LOOP THROUGH THE SERVERS
 337     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 338         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 339
 340 # perform the search, create the results objects
 341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 342         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 343
 344         #$query_to_use = $simple_query if $scan;
 345         warn $simple_query if ( $scan and $DEBUG );
 346
 347         # Check if we've got a query_type defined, if so, use it
 348         eval {
 349             if ($query_type)
 350             {
 351                 if ( $query_type =~ /^ccl/ ) {
 352                     $query_to_use =~
 353                       s/\:/\=/g;    # change : to = last minute (FIXME)
 354                     $results[$i] =
 355                       $zconns[$i]->search(
 356                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 357                       );
 358                 }
 359                 elsif ( $query_type =~ /^cql/ ) {
 360                     $results[$i] =
 361                       $zconns[$i]->search(
 362                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 363                 }
 364                 elsif ( $query_type =~ /^pqf/ ) {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 368                 }
 369             }
 370             else {
 371                 if ($scan) {
 372                     $results[$i] =
 373                       $zconns[$i]->scan(
 374                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 375                       );
 376                 }
 377                 else {
 378                     $results[$i] =
 379                       $zconns[$i]->search(
 380                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 381                       );
 382                 }
 383             }
 384         };
 385         if ($@) {
 386             warn "WARNING: query problem with $query_to_use " . $@;
 387         }
 388
 389         # Concatenate the sort_by limits and pass them to the results object
 390         # Note: sort will override rank
 391         my $sort_by;
 392         foreach my $sort (@sort_by) {
 393             if ( $sort eq "author_az" ) {
 394                 $sort_by .= "1=1003 <i ";
 395             }
 396             elsif ( $sort eq "author_za" ) {
 397                 $sort_by .= "1=1003 >i ";
 398             }
 399             elsif ( $sort eq "popularity_asc" ) {
 400                 $sort_by .= "1=9003 <i ";
 401             }
 402             elsif ( $sort eq "popularity_dsc" ) {
 403                 $sort_by .= "1=9003 >i ";
 404             }
 405             elsif ( $sort eq "call_number_asc" ) {
 406                 $sort_by .= "1=20  <i ";
 407             }
 408             elsif ( $sort eq "call_number_dsc" ) {
 409                 $sort_by .= "1=20 >i ";
 410             }
 411             elsif ( $sort eq "pubdate_asc" ) {
 412                 $sort_by .= "1=31 <i ";
 413             }
 414             elsif ( $sort eq "pubdate_dsc" ) {
 415                 $sort_by .= "1=31 >i ";
 416             }
 417             elsif ( $sort eq "acqdate_asc" ) {
 418                 $sort_by .= "1=32 <i ";
 419             }
 420             elsif ( $sort eq "acqdate_dsc" ) {
 421                 $sort_by .= "1=32 >i ";
 422             }
 423             elsif ( $sort eq "title_az" ) {
 424                 $sort_by .= "1=4 <i ";
 425             }
 426             elsif ( $sort eq "title_za" ) {
 427                 $sort_by .= "1=4 >i ";
 428             }
 429         }
 430         if ($sort_by) {
 431             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 432                 warn "WARNING sort $sort_by failed";
 433             }
 434         }
 435     }    # finished looping through servers
 436
 437     # The big moment: asynchronously retrieve results from all servers
 438     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 439         my $ev = $zconns[ $i - 1 ]->last_event();
 440         if ( $ev == ZOOM::Event::ZEND ) {
 441             next unless $results[ $i - 1 ];
 442             my $size = $results[ $i - 1 ]->size();
 443             if ( $size > 0 ) {
 444                 my $results_hash;
 445
 446                 # loop through the results
 447                 $results_hash->{'hits'} = $size;
 448                 my $times;
 449                 if ( $offset + $results_per_page <= $size ) {
 450                     $times = $offset + $results_per_page;
 451                 }
 452                 else {
 453                     $times = $size;
 454                 }
 455                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 456                     my $records_hash;
 457                     my $record;
 458                     my $facet_record;
 459
 460                     ## Check if it's an index scan
 461                     if ($scan) {
 462                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 463
 464                  # here we create a minimal MARC record and hand it off to the
 465                  # template just like a normal result ... perhaps not ideal, but
 466                  # it works for now
 467                         my $tmprecord = MARC::Record->new();
 468                         $tmprecord->encoding('UTF-8');
 469                         my $tmptitle;
 470                         my $tmpauthor;
 471
 472                 # the minimal record in author/title (depending on MARC flavour)
 473                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 474                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 475                             $tmprecord->append_fields($tmptitle);
 476                         } else {
 477                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 478                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 479                             $tmprecord->append_fields($tmptitle);
 480                             $tmprecord->append_fields($tmpauthor);
 481                         }
 482                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 483                     }
 484
 485                     # not an index scan
 486                     else {
 487                         $record = $results[ $i - 1 ]->record($j)->raw();
 488
 489                         # warn "RECORD $j:".$record;
 490                         $results_hash->{'RECORDS'}[$j] = $record;
 491
 492             # Fill the facets while we're looping, but only for the biblioserver
 493                         $facet_record = MARC::Record->new_from_usmarc($record)
 494                           if $servers[ $i - 1 ] =~ /biblioserver/;
 495
 496                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 497                         if ($facet_record) {
 498                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 499
 500                                 if ( $facets->[$k] ) {
 501                                     my @fields;
 502                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 503                                     {
 504                                         push @fields,
 505                                           $facet_record->field($tag);
 506                                     }
 507                                     for my $field (@fields) {
 508                                         my @subfields = $field->subfields();
 509                                         for my $subfield (@subfields) {
 510                                             my ( $code, $data ) = @$subfield;
 511                                             if ( $code eq
 512                                                 $facets->[$k]->{'subfield'} )
 513                                             {
 514                                                 $facets_counter->{ $facets->[$k]
 515                                                       ->{'link_value'} }
 516                                                   ->{$data}++;
 517                                             }
 518                                         }
 519                                     }
 520                                     $facets_info->{ $facets->[$k]
 521                                           ->{'link_value'} }->{'label_value'} =
 522                                       $facets->[$k]->{'label_value'};
 523                                     $facets_info->{ $facets->[$k]
 524                                           ->{'link_value'} }->{'expanded'} =
 525                                       $facets->[$k]->{'expanded'};
 526                                 }
 527                             }
 528                         }
 529                     }
 530                 }
 531                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 532             }
 533
 534             # warn "connection ", $i-1, ": $size hits";
 535             # warn $results[$i-1]->record(0)->render() if $size > 0;
 536
 537             # BUILD FACETS
 538             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 539                 for my $link_value (
 540                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 541                     keys %$facets_counter )
 542                 {
 543                     my $expandable;
 544                     my $number_of_facets;
 545                     my @this_facets_array;
 546                     for my $one_facet (
 547                         sort {
 548                             $facets_counter->{$link_value}
 549                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 550                         } keys %{ $facets_counter->{$link_value} }
 551                       )
 552                     {
 553                         $number_of_facets++;
 554                         if (   ( $number_of_facets < 6 )
 555                             || ( $expanded_facet eq $link_value )
 556                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 557                         {
 558
 559                       # Sanitize the link value ), ( will cause errors with CCL,
 560                             my $facet_link_value = $one_facet;
 561                             $facet_link_value =~ s/(\(|\))/ /g;
 562
 563                             # fix the length that will display in the label,
 564                             my $facet_label_value = $one_facet;
 565                             $facet_label_value =
 566                               substr( $one_facet, 0, 20 ) . "..."
 567                               unless length($facet_label_value) <= 20;
 568
 569                             # if it's a branch, label by the name, not the code,
 570                             if ( $link_value =~ /branch/ ) {
 571                                 $facet_label_value =
 572                                   $branches->{$one_facet}->{'branchname'};
 573                             }
 574
 575                 # but we're down with the whole label being in the link's title.
 576                             my $facet_title_value = $one_facet;
 577
 578                             push @this_facets_array,
 579                               (
 580                                 {
 581                                     facet_count =>
 582                                       $facets_counter->{$link_value}
 583                                       ->{$one_facet},
 584                                     facet_label_value => $facet_label_value,
 585                                     facet_title_value => $facet_title_value,
 586                                     facet_link_value  => $facet_link_value,
 587                                     type_link_value   => $link_value,
 588                                 },
 589                               );
 590                         }
 591                     }
 592
 593                     # handle expanded option
 594                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 595                         $expandable = 1
 596                           if ( ( $number_of_facets > 6 )
 597                             && ( $expanded_facet ne $link_value ) );
 598                     }
 599                     push @facets_loop,
 600                       (
 601                         {
 602                             type_link_value => $link_value,
 603                             type_id         => $link_value . "_id",
 604                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 605                             facets     => \@this_facets_array,
 606                             expandable => $expandable,
 607                             expand     => $link_value,
 608                         }
 609                       );
 610                 }
 611             }
 612         }
 613     }
 614     return ( undef, $results_hashref, \@facets_loop );
 615 }
 616
 617 sub pazGetRecords {
 618     my (
 619         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 620         $results_per_page, $offset,       $expanded_facet, $branches,
 621         $query_type,       $scan
 622     ) = @_;
 623
 624     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 625     $paz->init();
 626     $paz->search($simple_query);
 627     sleep 1;
 628
 629     # do results
 630     my $results_hashref = {};
 631     my $stats = XMLin($paz->stat);
 632     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 633
 634     # for a grouped search result, the number of hits
 635     # is the number of groups returned; 'bib_hits' will have
 636     # the total number of bibs.
 637     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 638     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 639
 640     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 641         my $recid = $hit->{recid}->[0];
 642
 643         my $work_title = $hit->{'md-work-title'}->[0];
 644         my $work_author;
 645         if (exists $hit->{'md-work-author'}) {
 646             $work_author = $hit->{'md-work-author'}->[0];
 647         }
 648         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 649
 650         my $result_group = {};
 651         $result_group->{'group_label'} = $group_label;
 652         $result_group->{'group_merge_key'} = $recid;
 653
 654         my $count = 1;
 655         if (exists $hit->{count}) {
 656             $count = $hit->{count}->[0];
 657         }
 658         $result_group->{'group_count'} = $count;
 659
 660         for (my $i = 0; $i < $count; $i++) {
 661             # FIXME -- may need to worry about diacritics here
 662             my $rec = $paz->record($recid, $i);
 663             push @{ $result_group->{'RECORDS'} }, $rec;
 664         }
 665
 666         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 667     }
 668
 669     # pass through facets
 670     my $termlist_xml = $paz->termlist('author,subject');
 671     my $terms = XMLin($termlist_xml, forcearray => 1);
 672     my @facets_loop = ();
 673     #die Dumper($results);
 674 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 675 #        my @facets = ();
 676 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 677 #            push @facets, {
 678 #                facet_label_value => $facet->{'name'}->[0],
 679 #            };
 680 #        }
 681 #        push @facets_loop, ( {
 682 #            type_label => $list,
 683 #            facets => \@facets,
 684 #        } );
 685 #    }
 686
 687     return ( undef, $results_hashref, \@facets_loop );
 688 }
 689
 690 # STOPWORDS
 691 sub _remove_stopwords {
 692     my ( $operand, $index ) = @_;
 693     my @stopwords_removed;
 694
 695     # phrase and exact-qualified indexes shouldn't have stopwords removed
 696     if ( $index !~ m/phr|ext/ ) {
 697
 698 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 699 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 700 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 701 #       is a stopword, we'd get "çon" and wouldn't find anything...
 702         foreach ( keys %{ C4::Context->stopwords } ) {
 703             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 704             if ( $operand =~
 705                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
 706             {
 707                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 708                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 709                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 710                 push @stopwords_removed, $_;
 711             }
 712         }
 713     }
 714     return ( $operand, \@stopwords_removed );
 715 }
 716
 717 # TRUNCATION
 718 sub _detect_truncation {
 719     my ( $operand, $index ) = @_;
 720     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 721         @regexpr );
 722     $operand =~ s/^ //g;
 723     my @wordlist = split( /\s/, $operand );
 724     foreach my $word (@wordlist) {
 725         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 726             push @rightlefttruncated, $word;
 727         }
 728         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 729             push @lefttruncated, $word;
 730         }
 731         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 732             push @righttruncated, $word;
 733         }
 734         elsif ( index( $word, "*" ) < 0 ) {
 735             push @nontruncated, $word;
 736         }
 737         else {
 738             push @regexpr, $word;
 739         }
 740     }
 741     return (
 742         \@nontruncated,       \@righttruncated, \@lefttruncated,
 743         \@rightlefttruncated, \@regexpr
 744     );
 745 }
 746
 747 # STEMMING
 748 sub _build_stemmed_operand {
 749     my ($operand) = @_;
 750     my $stemmed_operand;
 751
 752     # If operand contains a digit, it is almost certainly an identifier, and should
 753     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 754     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 755     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 756     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 757     return $operand if $operand =~ /\d/;
 758
 759 # FIXME: the locale should be set based on the user's language and/or search choice
 760     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 761
 762 # FIXME: these should be stored in the db so the librarian can modify the behavior
 763     $stemmer->add_exceptions(
 764         {
 765             'and' => 'and',
 766             'or'  => 'or',
 767             'not' => 'not',
 768         }
 769     );
 770     my @words = split( / /, $operand );
 771     my $stems = $stemmer->stem(@words);
 772     for my $stem (@$stems) {
 773         $stemmed_operand .= "$stem";
 774         $stemmed_operand .= "?"
 775           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 776         $stemmed_operand .= " ";
 777     }
 778     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 779     return $stemmed_operand;
 780 }
 781
 782 # FIELD WEIGHTING
 783 sub _build_weighted_query {
 784
 785 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 786 # pretty well but could work much better if we had a smarter query parser
 787     my ( $operand, $stemmed_operand, $index ) = @_;
 788     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 789     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 790     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 791
 792     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 793
 794     # Keyword, or, no index specified
 795     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 796         $weighted_query .=
 797           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 798         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 799         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 800           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 801           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 802         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 803           if $fuzzy_enabled;    # add fuzzy, word list
 804         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 805           if ( $stemming and $stemmed_operand )
 806           ;                     # add stemming, right truncation
 807         $weighted_query .= " or wrdl,r9=\"$operand\"";
 808
 809         # embedded sorting: 0 a-z; 1 z-a
 810         # $weighted_query .= ") or (sort1,aut=1";
 811     }
 812
 813     # Barcode searches should skip this process
 814     elsif ( $index eq 'bc' ) {
 815         $weighted_query .= "bc=\"$operand\"";
 816     }
 817
 818     # Authority-number searches should skip this process
 819     elsif ( $index eq 'an' ) {
 820         $weighted_query .= "an=\"$operand\"";
 821     }
 822
 823     # If the index already has more than one qualifier, wrap the operand
 824     # in quotes and pass it back (assumption is that the user knows what they
 825     # are doing and won't appreciate us mucking up their query
 826     elsif ( $index =~ ',' ) {
 827         $weighted_query .= " $index=\"$operand\"";
 828     }
 829
 830     #TODO: build better cases based on specific search indexes
 831     else {
 832         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 833           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 834         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 835         $weighted_query .=
 836           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 837     }
 838
 839     $weighted_query .= "))";                       # close rank specification
 840     return $weighted_query;
 841 }
 842
 843 =head2 buildQuery
 844
 845 ( $error, $query,
 846 $simple_query, $query_cgi,
 847 $query_desc, $limit,
 848 $limit_cgi, $limit_desc,
 849 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 850
 851 Build queries and limits in CCL, CGI, Human,
 852 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 853
 854 See verbose embedded documentation.
 855
 856
 857 =cut
 858
 859 sub buildQuery {
 860     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 861
 862     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 863
 864     # dereference
 865     my @operators = @$operators if $operators;
 866     my @indexes   = @$indexes   if $indexes;
 867     my @operands  = @$operands  if $operands;
 868     my @limits    = @$limits    if $limits;
 869     my @sort_by   = @$sort_by   if $sort_by;
 870
 871     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 872     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 873     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 874     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 875     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 876
 877     # no stemming/weight/fuzzy in NoZebra
 878     if ( C4::Context->preference("NoZebra") ) {
 879         $stemming      = 0;
 880         $weight_fields = 0;
 881         $fuzzy_enabled = 0;
 882     }
 883
 884     my $query        = $operands[0];
 885     my $simple_query = $operands[0];
 886
 887     # initialize the variables we're passing back
 888     my $query_cgi;
 889     my $query_desc;
 890     my $query_type;
 891
 892     my $limit;
 893     my $limit_cgi;
 894     my $limit_desc;
 895
 896     my $stopwords_removed;    # flag to determine if stopwords have been removed
 897
 898 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 899 # DIAGNOSTIC ONLY!!
 900     if ( $query =~ /^ccl=/ ) {
 901         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 902     }
 903     if ( $query =~ /^cql=/ ) {
 904         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 905     }
 906     if ( $query =~ /^pqf=/ ) {
 907         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 908     }
 909
 910     # pass nested queries directly
 911     # FIXME: need better handling of some of these variables in this case
 912     if ( $query =~ /(\(|\))/ ) {
 913         return (
 914             undef,              $query, $simple_query, $query_cgi,
 915             $query,             $limit, $limit_cgi,    $limit_desc,
 916             $stopwords_removed, 'ccl'
 917         );
 918     }
 919
 920 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 921 # query operands and indexes and add stemming, truncation, field weighting, etc.
 922 # Once we do so, we'll end up with a value in $query, just like if we had an
 923 # incoming $query from the user
 924     else {
 925         $query = ""
 926           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 927         my $previous_operand
 928           ;    # a flag used to keep track if there was a previous query
 929                # if there was, we can apply the current operator
 930                # for every operand
 931         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 932
 933             # COMBINE OPERANDS, INDEXES AND OPERATORS
 934             if ( $operands[$i] ) {
 935
 936               # A flag to determine whether or not to add the index to the query
 937                 my $indexes_set;
 938
 939 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 940                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 941                     $weight_fields    = 0;
 942                     $stemming         = 0;
 943                     $remove_stopwords = 0;
 944                 }
 945                 my $operand = $operands[$i];
 946                 my $index   = $indexes[$i];
 947
 948                 # Add index-specific attributes
 949                 # Date of Publication
 950                 if ( $index eq 'yr' ) {
 951                     $index .= ",st-numeric";
 952                     $indexes_set++;
 953                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 954                 }
 955
 956                 # Date of Acquisition
 957                 elsif ( $index eq 'acqdate' ) {
 958                     $index .= ",st-date-normalized";
 959                     $indexes_set++;
 960                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 961                 }
 962                 # ISBN,ISSN,Standard Number, don't need special treatment
 963                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 964                     $indexes_set++;
 965                     (
 966                         $stemming,      $auto_truncation,
 967                         $weight_fields, $fuzzy_enabled,
 968                         $remove_stopwords
 969                     ) = ( 0, 0, 0, 0, 0 );
 970
 971                 }
 972                 # Set default structure attribute (word list)
 973                 my $struct_attr;
 974                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 975                     $struct_attr = ",wrdl";
 976                 }
 977
 978                 # Some helpful index variants
 979                 my $index_plus       = $index . $struct_attr . ":" if $index;
 980                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 981
 982                 # Remove Stopwords
 983                 if ($remove_stopwords) {
 984                     ( $operand, $stopwords_removed ) =
 985                       _remove_stopwords( $operand, $index );
 986                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 987                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 988                       if ( $stopwords_removed && $DEBUG );
 989                 }
 990
 991                 # Detect Truncation
 992                 my ( $nontruncated, $righttruncated, $lefttruncated,
 993                     $rightlefttruncated, $regexpr );
 994                 my $truncated_operand;
 995                 (
 996                     $nontruncated, $righttruncated, $lefttruncated,
 997                     $rightlefttruncated, $regexpr
 998                 ) = _detect_truncation( $operand, $index );
 999                 warn
1000 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1001                   if $DEBUG;
1002
1003                 # Apply Truncation
1004                 if (
1005                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1006                     scalar(@$rightlefttruncated) > 0 )
1007                 {
1008
1009                # Don't field weight or add the index to the query, we do it here
1010                     $indexes_set = 1;
1011                     undef $weight_fields;
1012                     my $previous_truncation_operand;
1013                     if ( scalar(@$nontruncated) > 0 ) {
1014                         $truncated_operand .= "$index_plus @$nontruncated ";
1015                         $previous_truncation_operand = 1;
1016                     }
1017                     if ( scalar(@$righttruncated) > 0 ) {
1018                         $truncated_operand .= "and "
1019                           if $previous_truncation_operand;
1020                         $truncated_operand .=
1021                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1022                         $previous_truncation_operand = 1;
1023                     }
1024                     if ( scalar(@$lefttruncated) > 0 ) {
1025                         $truncated_operand .= "and "
1026                           if $previous_truncation_operand;
1027                         $truncated_operand .=
1028                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1029                         $previous_truncation_operand = 1;
1030                     }
1031                     if ( scalar(@$rightlefttruncated) > 0 ) {
1032                         $truncated_operand .= "and "
1033                           if $previous_truncation_operand;
1034                         $truncated_operand .=
1035                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1036                         $previous_truncation_operand = 1;
1037                     }
1038                 }
1039                 $operand = $truncated_operand if $truncated_operand;
1040                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1041
1042                 # Handle Stemming
1043                 my $stemmed_operand;
1044                 $stemmed_operand = _build_stemmed_operand($operand)
1045                   if $stemming;
1046                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1047
1048                 # Handle Field Weighting
1049                 my $weighted_operand;
1050                 $weighted_operand =
1051                   _build_weighted_query( $operand, $stemmed_operand, $index )
1052                   if $weight_fields;
1053                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1054                 $operand = $weighted_operand if $weight_fields;
1055                 $indexes_set = 1 if $weight_fields;
1056
1057                 # If there's a previous operand, we need to add an operator
1058                 if ($previous_operand) {
1059
1060                     # User-specified operator
1061                     if ( $operators[ $i - 1 ] ) {
1062                         $query     .= " $operators[$i-1] ";
1063                         $query     .= " $index_plus " unless $indexes_set;
1064                         $query     .= " $operand";
1065                         $query_cgi .= "&op=$operators[$i-1]";
1066                         $query_cgi .= "&idx=$index" if $index;
1067                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1068                         $query_desc .=
1069                           " $operators[$i-1] $index_plus $operands[$i]";
1070                     }
1071
1072                     # Default operator is and
1073                     else {
1074                         $query      .= " and ";
1075                         $query      .= "$index_plus " unless $indexes_set;
1076                         $query      .= "$operand";
1077                         $query_cgi  .= "&op=and&idx=$index" if $index;
1078                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1079                         $query_desc .= " and $index_plus $operands[$i]";
1080                     }
1081                 }
1082
1083                 # There isn't a pervious operand, don't need an operator
1084                 else {
1085
1086                     # Field-weighted queries already have indexes set
1087                     $query .= " $index_plus " unless $indexes_set;
1088                     $query .= $operand;
1089                     $query_desc .= " $index_plus $operands[$i]";
1090                     $query_cgi  .= "&idx=$index" if $index;
1091                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1092                     $previous_operand = 1;
1093                 }
1094             }    #/if $operands
1095         }    # /for
1096     }
1097     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1098
1099     # add limits
1100     my $group_OR_limits;
1101     my $availability_limit;
1102     foreach my $this_limit (@limits) {
1103         if ( $this_limit =~ /available/ ) {
1104
1105 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1106 # In English:
1107 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1108             $availability_limit .=
1109 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1110             $limit_cgi  .= "&limit=available";
1111             $limit_desc .= "";
1112         }
1113
1114         # group_OR_limits, prefixed by mc-
1115         # OR every member of the group
1116         elsif ( $this_limit =~ /mc/ ) {
1117             $group_OR_limits .= " or " if $group_OR_limits;
1118             $limit_desc      .= " or " if $group_OR_limits;
1119             $group_OR_limits .= "$this_limit";
1120             $limit_cgi       .= "&limit=$this_limit";
1121             $limit_desc      .= " $this_limit";
1122         }
1123
1124         # Regular old limits
1125         else {
1126             $limit .= " and " if $limit || $query;
1127             $limit      .= "$this_limit";
1128             $limit_cgi  .= "&limit=$this_limit";
1129             $limit_desc .= " $this_limit";
1130         }
1131     }
1132     if ($group_OR_limits) {
1133         $limit .= " and " if ( $query || $limit );
1134         $limit .= "($group_OR_limits)";
1135     }
1136     if ($availability_limit) {
1137         $limit .= " and " if ( $query || $limit );
1138         $limit .= "($availability_limit)";
1139     }
1140
1141     # Normalize the query and limit strings
1142     $query =~ s/:/=/g;
1143     $limit =~ s/:/=/g;
1144     for ( $query, $query_desc, $limit, $limit_desc ) {
1145         $_ =~ s/  / /g;    # remove extra spaces
1146         $_ =~ s/^ //g;     # remove any beginning spaces
1147         $_ =~ s/ $//g;     # remove any ending spaces
1148         $_ =~ s/==/=/g;    # remove double == from query
1149     }
1150     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1151
1152     for ($query_cgi,$simple_query) {
1153         $_ =~ s/"//g;
1154     }
1155     # append the limit to the query
1156     $query .= " " . $limit;
1157
1158     # Warnings if DEBUG
1159     if ($DEBUG) {
1160         warn "QUERY:" . $query;
1161         warn "QUERY CGI:" . $query_cgi;
1162         warn "QUERY DESC:" . $query_desc;
1163         warn "LIMIT:" . $limit;
1164         warn "LIMIT CGI:" . $limit_cgi;
1165         warn "LIMIT DESC:" . $limit_desc;
1166         warn "---------\nLeave buildQuery\n---------";
1167     }
1168     return (
1169         undef,              $query, $simple_query, $query_cgi,
1170         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1171         $stopwords_removed, $query_type
1172     );
1173 }
1174
1175 =head2 searchResults
1176
1177 Format results in a form suitable for passing to the template
1178
1179 =cut
1180
1181 # IMO this subroutine is pretty messy still -- it's responsible for
1182 # building the HTML output for the template
1183 sub searchResults {
1184     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1185     my $dbh = C4::Context->dbh;
1186     my $even = 1;
1187     my @newresults;
1188
1189     # add search-term highlighting via <span>s on the search terms
1190     my $span_terms_hashref;
1191     for my $span_term ( split( / /, $searchdesc ) ) {
1192         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1193         $span_terms_hashref->{$span_term}++;
1194     }
1195
1196     #Build branchnames hash
1197     #find branchname
1198     #get branch information.....
1199     my %branches;
1200     my $bsth =
1201       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1202       ;    # FIXME : use C4::Koha::GetBranches
1203     $bsth->execute();
1204     while ( my $bdata = $bsth->fetchrow_hashref ) {
1205         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1206     }
1207 # FIXME - We build an authorised values hash here, using the default framework
1208 # though it is possible to have different authvals for different fws.
1209
1210     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1211
1212     # get notforloan authorised value list (see $shelflocations  FIXME)
1213     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1214
1215     #Build itemtype hash
1216     #find itemtype & itemtype image
1217     my %itemtypes;
1218     $bsth =
1219       $dbh->prepare(
1220         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1221       );
1222     $bsth->execute();
1223     while ( my $bdata = $bsth->fetchrow_hashref ) {
1224                 foreach (qw(description imageurl summary notforloan)) {
1225                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1226                 }
1227     }
1228
1229     #search item field code
1230     my $sth =
1231       $dbh->prepare(
1232 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1233       );
1234     $sth->execute;
1235     my ($itemtag) = $sth->fetchrow;
1236
1237     ## find column names of items related to MARC
1238     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1239     $sth2->execute;
1240     my %subfieldstosearch;
1241     while ( ( my $column ) = $sth2->fetchrow ) {
1242         my ( $tagfield, $tagsubfield ) =
1243           &GetMarcFromKohaField( "items." . $column, "" );
1244         $subfieldstosearch{$column} = $tagsubfield;
1245     }
1246
1247     # handle which records to actually retrieve
1248     my $times;
1249     if ( $hits && $offset + $results_per_page <= $hits ) {
1250         $times = $offset + $results_per_page;
1251     }
1252     else {
1253         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1254     }
1255
1256     # loop through all of the records we've retrieved
1257     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1258         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1259         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1260         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1261         $oldbiblio->{result_number} = $i + 1;
1262
1263         # add imageurl to itemtype if there is one
1264         if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1265             $oldbiblio->{imageurl} =
1266               $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1267         } else {
1268             $oldbiblio->{imageurl} =
1269               getitemtypeimagesrc() . "/"
1270               . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1271               if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1272         }
1273                 my $biblio_authorised_value_images = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{biblionumber} ) );
1274                 $oldbiblio->{authorised_value_images} = $biblio_authorised_value_images;
1275         my $aisbn = $oldbiblio->{'isbn'};
1276         $aisbn =~ /(\d*[X]*)/;
1277         $oldbiblio->{amazonisbn} = $1;
1278                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1279  # Build summary if there is one (the summary is defined in the itemtypes table)
1280  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1281         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1282             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1283             my @fields  = $marcrecord->fields();
1284             foreach my $field (@fields) {
1285                 my $tag      = $field->tag();
1286                 my $tagvalue = $field->as_string();
1287                 $summary =~
1288                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1289                 unless ( $tag < 10 ) {
1290                     my @subf = $field->subfields;
1291                     for my $i ( 0 .. $#subf ) {
1292                         my $subfieldcode  = $subf[$i][0];
1293                         my $subfieldvalue = $subf[$i][1];
1294                         my $tagsubf       = $tag . $subfieldcode;
1295                         $summary =~
1296 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1297                     }
1298                 }
1299             }
1300             # FIXME: yuk
1301             $summary =~ s/\[(.*?)]//g;
1302             $summary =~ s/\n/<br\/>/g;
1303             $oldbiblio->{summary} = $summary;
1304         }
1305
1306         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1307         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1308         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1309         # Add search-term highlighting to the whole record where they match using <span>s
1310         if (C4::Context->preference("OpacHighlightedWords")){
1311             my $searchhighlightblob;
1312             for my $highlight_field ( $marcrecord->fields ) {
1313
1314     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1315                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1316                 for my $subfield ($highlight_field->subfields()) {
1317                     my $match;
1318                     next if $subfield->[0] eq '9';
1319                     my $field = $subfield->[1];
1320                     for my $term ( keys %$span_terms_hashref ) {
1321                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1322                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1323                         $match++;
1324                         }
1325                     }
1326                     $searchhighlightblob .= $field . " ... " if $match;
1327                 }
1328
1329             }
1330             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1331             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1332         }
1333
1334         # Add search-term highlighting to the title, subtitle, etc. fields
1335         for my $term ( keys %$span_terms_hashref ) {
1336             my $old_term = $term;
1337             if ( length($term) > 3 ) {
1338                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1339                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1340                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1341                                 }
1342             }
1343         }
1344
1345         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1346
1347         # Pull out the items fields
1348         my @fields = $marcrecord->field($itemtag);
1349
1350         # Setting item statuses for display
1351         my @available_items_loop;
1352         my @onloan_items_loop;
1353         my @other_items_loop;
1354
1355         my $available_items;
1356         my $onloan_items;
1357         my $other_items;
1358
1359         my $ordered_count         = 0;
1360         my $available_count       = 0;
1361         my $onloan_count          = 0;
1362         my $longoverdue_count     = 0;
1363         my $other_count           = 0;
1364         my $wthdrawn_count        = 0;
1365         my $itemlost_count        = 0;
1366         my $itembinding_count     = 0;
1367         my $itemdamaged_count     = 0;
1368         my $item_in_transit_count = 0;
1369         my $can_place_holds       = 0;
1370         my $items_count           = scalar(@fields);
1371         my $items_counter;
1372         my $maxitems =
1373           ( C4::Context->preference('maxItemsinSearchResults') )
1374           ? C4::Context->preference('maxItemsinSearchResults') - 1
1375           : 1;
1376
1377         # loop through every item
1378         foreach my $field (@fields) {
1379             my $item;
1380             $items_counter++;
1381
1382             # populate the items hash
1383             foreach my $code ( keys %subfieldstosearch ) {
1384                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1385             }
1386                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1387                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1388             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1389             if ($item->{$hbranch}) {
1390                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1391             }
1392             elsif ($item->{$otherbranch}) {     # Last resort
1393                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1394             }
1395
1396                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1397 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1398             if ( $item->{onloan} ) {
1399                 $onloan_count++;
1400                                 my $key = $prefix . $item->{due_date};
1401                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1402                                 $onloan_items->{$key}->{count}++ if $item->{homebranch};
1403                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1404                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1405                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1406                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1407                 # if something's checked out and lost, mark it as 'long overdue'
1408                 if ( $item->{itemlost} ) {
1409                     $onloan_items->{$prefix}->{longoverdue}++;
1410                     $longoverdue_count++;
1411                 } else {        # can place holds as long as item isn't lost
1412                     $can_place_holds = 1;
1413                 }
1414             }
1415
1416          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1417             else {
1418
1419                 # item is on order
1420                 if ( $item->{notforloan} == -1 ) {
1421                     $ordered_count++;
1422                 }
1423
1424                 # is item in transit?
1425                 my $transfertwhen = '';
1426                 my ($transfertfrom, $transfertto);
1427
1428                 unless ($item->{wthdrawn}
1429                         || $item->{itemlost}
1430                         || $item->{damaged}
1431                         || $item->{notforloan}
1432                         || $items_count > 20) {
1433
1434                     # A couple heuristics to limit how many times
1435                     # we query the database for item transfer information, sacrificing
1436                     # accuracy in some cases for speed;
1437                     #
1438                     # 1. don't query if item has one of the other statuses
1439                     # 2. don't check transit status if the bib has
1440                     #    more than 20 items
1441                     #
1442                     # FIXME: to avoid having the query the database like this, and to make
1443                     #        the in transit status count as unavailable for search limiting,
1444                     #        should map transit status to record indexed in Zebra.
1445                     #
1446                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1447                 }
1448
1449                 # item is withdrawn, lost or damaged
1450                 if (   $item->{wthdrawn}
1451                     || $item->{itemlost}
1452                     || $item->{damaged}
1453                     || $item->{notforloan}
1454                     || ($transfertwhen ne ''))
1455                 {
1456                     $wthdrawn_count++        if $item->{wthdrawn};
1457                     $itemlost_count++        if $item->{itemlost};
1458                     $itemdamaged_count++     if $item->{damaged};
1459                     $item_in_transit_count++ if $transfertwhen ne '';
1460                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1461                     $other_count++;
1462
1463                                         my $key = $prefix . $item->{status};
1464                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1465                         $other_items->{$key}->{$_} = $item->{$_};
1466                                         }
1467                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1468                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1469                                         $other_items->{$key}->{count}++ if $item->{homebranch};
1470                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1471                                         $other_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1472                 }
1473                 # item is available
1474                 else {
1475                     $can_place_holds = 1;
1476                     $available_count++;
1477                                         $available_items->{$prefix}->{count}++ if $item->{homebranch};
1478                                         foreach (qw(branchname itemcallnumber)) {
1479                         $available_items->{$prefix}->{$_} = $item->{$_};
1480                                         }
1481                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1482                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1483                 }
1484             }
1485         }    # notforloan, item level and biblioitem level
1486         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1487         $maxitems =
1488           ( C4::Context->preference('maxItemsinSearchResults') )
1489           ? C4::Context->preference('maxItemsinSearchResults') - 1
1490           : 1;
1491         for my $key ( sort keys %$onloan_items ) {
1492             (++$onloanitemscount > $maxitems) and last;
1493             push @onloan_items_loop, $onloan_items->{$key};
1494         }
1495         for my $key ( sort keys %$other_items ) {
1496             (++$otheritemscount > $maxitems) and last;
1497             push @other_items_loop, $other_items->{$key};
1498         }
1499         for my $key ( sort keys %$available_items ) {
1500             (++$availableitemscount > $maxitems) and last;
1501             push @available_items_loop, $available_items->{$key}
1502         }
1503
1504         # XSLT processing of some stuff
1505         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1506             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber},C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1507             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1508         }
1509
1510         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1511         $can_place_holds = 0
1512           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1513         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1514         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1515         $oldbiblio->{items_count}          = $items_count;
1516         $oldbiblio->{available_items_loop} = \@available_items_loop;
1517         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1518         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1519         $oldbiblio->{availablecount}       = $available_count;
1520         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1521         $oldbiblio->{onloancount}          = $onloan_count;
1522         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1523         $oldbiblio->{othercount}           = $other_count;
1524         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1525         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1526         $oldbiblio->{itemlostcount}        = $itemlost_count;
1527         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1528         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1529         $oldbiblio->{orderedcount}         = $ordered_count;
1530         $oldbiblio->{isbn} =~
1531           s/-//g;    # deleting - in isbn to enable amazon content
1532         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'} ) );
1533         push( @newresults, $oldbiblio );
1534     }
1535     return @newresults;
1536 }
1537
1538 #----------------------------------------------------------------------
1539 #
1540 # Non-Zebra GetRecords#
1541 #----------------------------------------------------------------------
1542
1543 =head2 NZgetRecords
1544
1545   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1546
1547 =cut
1548
1549 sub NZgetRecords {
1550     my (
1551         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1552         $results_per_page, $offset,       $expanded_facet, $branches,
1553         $query_type,       $scan
1554     ) = @_;
1555     warn "query =$query" if $DEBUG;
1556     my $result = NZanalyse($query);
1557     warn "results =$result" if $DEBUG;
1558     return ( undef,
1559         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1560         undef );
1561 }
1562
1563 =head2 NZanalyse
1564
1565   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1566   the list is built from an inverted index in the nozebra SQL table
1567   note that title is here only for convenience : the sorting will be very fast when requested on title
1568   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1569
1570 =cut
1571
1572 sub NZanalyse {
1573     my ( $string, $server ) = @_;
1574 #     warn "---------"       if $DEBUG;
1575     warn " NZanalyse" if $DEBUG;
1576 #     warn "---------"       if $DEBUG;
1577
1578  # $server contains biblioserver or authorities, depending on what we search on.
1579  #warn "querying : $string on $server";
1580     $server = 'biblioserver' unless $server;
1581
1582 # if we have a ", replace the content to discard temporarily any and/or/not inside
1583     my $commacontent;
1584     if ( $string =~ /"/ ) {
1585         $string =~ s/"(.*?)"/__X__/;
1586         $commacontent = $1;
1587         warn "commacontent : $commacontent" if $DEBUG;
1588     }
1589
1590 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1591 # then, call again NZanalyse with $left and $right
1592 # (recursive until we find a leaf (=> something without and/or/not)
1593 # delete repeated operator... Would then go in infinite loop
1594     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1595     }
1596
1597     #process parenthesis before.
1598     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1599         my $left     = $1;
1600         my $right    = $4;
1601         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1602         warn
1603 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1604           if $DEBUG;
1605         my $leftresult = NZanalyse( $left, $server );
1606         if ($operator) {
1607             my $rightresult = NZanalyse( $right, $server );
1608
1609             # OK, we have the results for right and left part of the query
1610             # depending of operand, intersect, union or exclude both lists
1611             # to get a result list
1612             if ( $operator eq ' and ' ) {
1613                 return NZoperatorAND($leftresult,$rightresult);
1614             }
1615             elsif ( $operator eq ' or ' ) {
1616
1617                 # just merge the 2 strings
1618                 return $leftresult . $rightresult;
1619             }
1620             elsif ( $operator eq ' not ' ) {
1621                 return NZoperatorNOT($leftresult,$rightresult);
1622             }
1623         }
1624         else {
1625 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1626             return $leftresult;
1627         }
1628     }
1629     warn "string :" . $string if $DEBUG;
1630     my $left = "";
1631     my $right = "";
1632     my $operator = "";
1633     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1634         $left     = $1;
1635         $right    = $3;
1636         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1637     }
1638     warn "no parenthesis. left : $left operator: $operator right: $right"
1639       if $DEBUG;
1640
1641     # it's not a leaf, we have a and/or/not
1642     if ($operator) {
1643
1644         # reintroduce comma content if needed
1645         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1646         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1647         warn "node : $left / $operator / $right\n" if $DEBUG;
1648         my $leftresult  = NZanalyse( $left,  $server );
1649         my $rightresult = NZanalyse( $right, $server );
1650         warn " leftresult : $leftresult" if $DEBUG;
1651         warn " rightresult : $rightresult" if $DEBUG;
1652         # OK, we have the results for right and left part of the query
1653         # depending of operand, intersect, union or exclude both lists
1654         # to get a result list
1655         if ( $operator eq ' and ' ) {
1656             warn "NZAND";
1657             return NZoperatorAND($leftresult,$rightresult);
1658         }
1659         elsif ( $operator eq ' or ' ) {
1660
1661             # just merge the 2 strings
1662             return $leftresult . $rightresult;
1663         }
1664         elsif ( $operator eq ' not ' ) {
1665             return NZoperatorNOT($leftresult,$rightresult);
1666         }
1667         else {
1668
1669 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1670             die "error : operand unknown : $operator for $string";
1671         }
1672
1673         # it's a leaf, do the real SQL query and return the result
1674     }
1675     else {
1676         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1677         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1678         #remove trailing blank at the beginning
1679         $string =~ s/^ //g;
1680         warn "leaf:$string" if $DEBUG;
1681
1682         # parse the string in in operator/operand/value again
1683         my $left = "";
1684         my $operator = "";
1685         my $right = "";
1686         if ($string =~ /(.*)(>=|<=)(.*)/) {
1687             $left     = $1;
1688             $operator = $2;
1689             $right    = $3;
1690         } else {
1691             $left = $string;
1692         }
1693 #         warn "handling leaf... left:$left operator:$operator right:$right"
1694 #           if $DEBUG;
1695         unless ($operator) {
1696             if ($string =~ /(.*)(>|<|=)(.*)/) {
1697                 $left     = $1;
1698                 $operator = $2;
1699                 $right    = $3;
1700                 warn
1701     "handling unless (operator)... left:$left operator:$operator right:$right"
1702                 if $DEBUG;
1703             } else {
1704                 $left = $string;
1705             }
1706         }
1707         my $results;
1708
1709 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1710         $left =~ s/ .*$//;
1711
1712         # automatic replace for short operators
1713         $left = 'title'            if $left =~ '^ti$';
1714         $left = 'author'           if $left =~ '^au$';
1715         $left = 'publisher'        if $left =~ '^pb$';
1716         $left = 'subject'          if $left =~ '^su$';
1717         $left = 'koha-Auth-Number' if $left =~ '^an$';
1718         $left = 'keyword'          if $left =~ '^kw$';
1719         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1720         if ( $operator && $left ne 'keyword' ) {
1721
1722             #do a specific search
1723             my $dbh = C4::Context->dbh;
1724             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1725             my $sth =
1726               $dbh->prepare(
1727 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1728               );
1729             warn "$left / $operator / $right\n" if $DEBUG;
1730
1731             # split each word, query the DB and build the biblionumbers result
1732             #sanitizing leftpart
1733             $left =~ s/^\s+|\s+$//;
1734             foreach ( split / /, $right ) {
1735                 my $biblionumbers;
1736                 $_ =~ s/^\s+|\s+$//;
1737                 next unless $_;
1738                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1739                 $sth->execute( $server, $left, $_ )
1740                   or warn "execute failed: $!";
1741                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1742
1743 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1744 # otherwise, fill the result
1745                     $biblionumbers .= $line
1746                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1747                     warn "result : $value "
1748                       . ( $right  =~ /\d/ ) . "=="
1749                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1750                 }
1751
1752 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1753                 if ($results) {
1754                     warn "NZAND" if $DEBUG;
1755                     $results = NZoperatorAND($biblionumbers,$results);
1756                 }
1757                 else {
1758                     $results = $biblionumbers;
1759                 }
1760             }
1761         }
1762         else {
1763
1764       #do a complete search (all indexes), if index='kw' do complete search too.
1765             my $dbh = C4::Context->dbh;
1766             my $sth =
1767               $dbh->prepare(
1768 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1769               );
1770
1771             # split each word, query the DB and build the biblionumbers result
1772             foreach ( split / /, $string ) {
1773                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1774                 warn "search on all indexes on $_" if $DEBUG;
1775                 my $biblionumbers;
1776                 next unless $_;
1777                 $sth->execute( $server, $_ );
1778                 while ( my $line = $sth->fetchrow ) {
1779                     $biblionumbers .= $line;
1780                 }
1781
1782 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1783                 if ($results) {
1784                     $results = NZoperatorAND($biblionumbers,$results);
1785                 }
1786                 else {
1787                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1788                     $results = $biblionumbers;
1789                 }
1790             }
1791         }
1792         warn "return : $results for LEAF : $string" if $DEBUG;
1793         return $results;
1794     }
1795     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1796 }
1797
1798 sub NZoperatorAND{
1799     my ($rightresult, $leftresult)=@_;
1800
1801     my @leftresult = split /;/, $leftresult;
1802     warn " @leftresult / $rightresult \n" if $DEBUG;
1803
1804     #             my @rightresult = split /;/,$leftresult;
1805     my $finalresult;
1806
1807 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1808 # the result is stored twice, to have the same weight for AND than OR.
1809 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1810 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1811     foreach (@leftresult) {
1812         my $value = $_;
1813         my $countvalue;
1814         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1815         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1816             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1817             $finalresult .=
1818                 "$value-$countvalue;$value-$countvalue;";
1819         }
1820     }
1821     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1822     return $finalresult;
1823 }
1824
1825 sub NZoperatorOR{
1826     my ($rightresult, $leftresult)=@_;
1827     return $rightresult.$leftresult;
1828 }
1829
1830 sub NZoperatorNOT{
1831     my ($leftresult, $rightresult)=@_;
1832
1833     my @leftresult = split /;/, $leftresult;
1834
1835     #             my @rightresult = split /;/,$leftresult;
1836     my $finalresult;
1837     foreach (@leftresult) {
1838         my $value=$_;
1839         $value=$1 if $value=~m/(.*)-\d+$/;
1840         unless ($rightresult =~ "$value-") {
1841             $finalresult .= "$_;";
1842         }
1843     }
1844     return $finalresult;
1845 }
1846
1847 =head2 NZorder
1848
1849   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1850
1851   TODO :: Description
1852
1853 =cut
1854
1855 sub NZorder {
1856     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1857     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1858
1859     # order title asc by default
1860     #     $ordering = '1=36 <i' unless $ordering;
1861     $results_per_page = 20 unless $results_per_page;
1862     $offset           = 0  unless $offset;
1863     my $dbh = C4::Context->dbh;
1864
1865     #
1866     # order by POPULARITY
1867     #
1868     if ( $ordering =~ /popularity/ ) {
1869         my %result;
1870         my %popularity;
1871
1872         # popularity is not in MARC record, it's builded from a specific query
1873         my $sth =
1874           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1875         foreach ( split /;/, $biblionumbers ) {
1876             my ( $biblionumber, $title ) = split /,/, $_;
1877             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1878             $sth->execute($biblionumber);
1879             my $popularity = $sth->fetchrow || 0;
1880
1881 # hint : the key is popularity.title because we can have
1882 # many results with the same popularity. In this cas, sub-ordering is done by title
1883 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1884 # (un-frequent, I agree, but we won't forget anything that way ;-)
1885             $popularity{ sprintf( "%10d", $popularity ) . $title
1886                   . $biblionumber } = $biblionumber;
1887         }
1888
1889     # sort the hash and return the same structure as GetRecords (Zebra querying)
1890         my $result_hash;
1891         my $numbers = 0;
1892         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1893             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1894                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1895                   $result{ $popularity{$key} }->as_usmarc();
1896             }
1897         }
1898         else {                                    # sort popularity ASC
1899             foreach my $key ( sort ( keys %popularity ) ) {
1900                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1901                   $result{ $popularity{$key} }->as_usmarc();
1902             }
1903         }
1904         my $finalresult = ();
1905         $result_hash->{'hits'}         = $numbers;
1906         $finalresult->{'biblioserver'} = $result_hash;
1907         return $finalresult;
1908
1909         #
1910         # ORDER BY author
1911         #
1912     }
1913     elsif ( $ordering =~ /author/ ) {
1914         my %result;
1915         foreach ( split /;/, $biblionumbers ) {
1916             my ( $biblionumber, $title ) = split /,/, $_;
1917             my $record = GetMarcBiblio($biblionumber);
1918             my $author;
1919             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1920                 $author = $record->subfield( '200', 'f' );
1921                 $author = $record->subfield( '700', 'a' ) unless $author;
1922             }
1923             else {
1924                 $author = $record->subfield( '100', 'a' );
1925             }
1926
1927 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1928 # and we don't want to get only 1 result for each of them !!!
1929             $result{ $author . $biblionumber } = $record;
1930         }
1931
1932     # sort the hash and return the same structure as GetRecords (Zebra querying)
1933         my $result_hash;
1934         my $numbers = 0;
1935         if ( $ordering eq 'author_za' ) {    # sort by author desc
1936             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1937                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1938                   $result{$key}->as_usmarc();
1939             }
1940         }
1941         else {                               # sort by author ASC
1942             foreach my $key ( sort ( keys %result ) ) {
1943                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1944                   $result{$key}->as_usmarc();
1945             }
1946         }
1947         my $finalresult = ();
1948         $result_hash->{'hits'}         = $numbers;
1949         $finalresult->{'biblioserver'} = $result_hash;
1950         return $finalresult;
1951
1952         #
1953         # ORDER BY callnumber
1954         #
1955     }
1956     elsif ( $ordering =~ /callnumber/ ) {
1957         my %result;
1958         foreach ( split /;/, $biblionumbers ) {
1959             my ( $biblionumber, $title ) = split /,/, $_;
1960             my $record = GetMarcBiblio($biblionumber);
1961             my $callnumber;
1962             my ( $callnumber_tag, $callnumber_subfield ) =
1963               GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1964             ( $callnumber_tag, $callnumber_subfield ) =
1965               GetMarcFromKohaField('biblioitems.callnumber')
1966               unless $callnumber_tag;
1967             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1968                 $callnumber = $record->subfield( '200', 'f' );
1969             }
1970             else {
1971                 $callnumber = $record->subfield( '100', 'a' );
1972             }
1973
1974 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1975 # and we don't want to get only 1 result for each of them !!!
1976             $result{ $callnumber . $biblionumber } = $record;
1977         }
1978
1979     # sort the hash and return the same structure as GetRecords (Zebra querying)
1980         my $result_hash;
1981         my $numbers = 0;
1982         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1983             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1984                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1985                   $result{$key}->as_usmarc();
1986             }
1987         }
1988         else {                                     # sort by title ASC
1989             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1990                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1991                   $result{$key}->as_usmarc();
1992             }
1993         }
1994         my $finalresult = ();
1995         $result_hash->{'hits'}         = $numbers;
1996         $finalresult->{'biblioserver'} = $result_hash;
1997         return $finalresult;
1998     }
1999     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2000         my %result;
2001         foreach ( split /;/, $biblionumbers ) {
2002             my ( $biblionumber, $title ) = split /,/, $_;
2003             my $record = GetMarcBiblio($biblionumber);
2004             my ( $publicationyear_tag, $publicationyear_subfield ) =
2005               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2006             my $publicationyear =
2007               $record->subfield( $publicationyear_tag,
2008                 $publicationyear_subfield );
2009
2010 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2011 # and we don't want to get only 1 result for each of them !!!
2012             $result{ $publicationyear . $biblionumber } = $record;
2013         }
2014
2015     # sort the hash and return the same structure as GetRecords (Zebra querying)
2016         my $result_hash;
2017         my $numbers = 0;
2018         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2019             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2020                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2021                   $result{$key}->as_usmarc();
2022             }
2023         }
2024         else {                                 # sort by pub year ASC
2025             foreach my $key ( sort ( keys %result ) ) {
2026                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2027                   $result{$key}->as_usmarc();
2028             }
2029         }
2030         my $finalresult = ();
2031         $result_hash->{'hits'}         = $numbers;
2032         $finalresult->{'biblioserver'} = $result_hash;
2033         return $finalresult;
2034
2035         #
2036         # ORDER BY title
2037         #
2038     }
2039     elsif ( $ordering =~ /title/ ) {
2040
2041 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2042         my %result;
2043         foreach ( split /;/, $biblionumbers ) {
2044             my ( $biblionumber, $title ) = split /,/, $_;
2045
2046 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2047 # and we don't want to get only 1 result for each of them !!!
2048 # hint & speed improvement : we can order without reading the record
2049 # so order, and read records only for the requested page !
2050             $result{ $title . $biblionumber } = $biblionumber;
2051         }
2052
2053     # sort the hash and return the same structure as GetRecords (Zebra querying)
2054         my $result_hash;
2055         my $numbers = 0;
2056         if ( $ordering eq 'title_az' ) {    # sort by title desc
2057             foreach my $key ( sort ( keys %result ) ) {
2058                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2059             }
2060         }
2061         else {                              # sort by title ASC
2062             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2063                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2064             }
2065         }
2066
2067         # limit the $results_per_page to result size if it's more
2068         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2069
2070         # for the requested page, replace biblionumber by the complete record
2071         # speed improvement : avoid reading too much things
2072         for (
2073             my $counter = $offset ;
2074             $counter <= $offset + $results_per_page ;
2075             $counter++
2076           )
2077         {
2078             $result_hash->{'RECORDS'}[$counter] =
2079               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2080         }
2081         my $finalresult = ();
2082         $result_hash->{'hits'}         = $numbers;
2083         $finalresult->{'biblioserver'} = $result_hash;
2084         return $finalresult;
2085     }
2086     else {
2087
2088 #
2089 # order by ranking
2090 #
2091 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2092         my %result;
2093         my %count_ranking;
2094         foreach ( split /;/, $biblionumbers ) {
2095             my ( $biblionumber, $title ) = split /,/, $_;
2096             $title =~ /(.*)-(\d)/;
2097
2098             # get weight
2099             my $ranking = $2;
2100
2101 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2102 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2103 # biblio N has ranking = 6
2104             $count_ranking{$biblionumber} += $ranking;
2105         }
2106
2107 # build the result by "inverting" the count_ranking hash
2108 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2109 #         warn "counting";
2110         foreach ( keys %count_ranking ) {
2111             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2112         }
2113
2114     # sort the hash and return the same structure as GetRecords (Zebra querying)
2115         my $result_hash;
2116         my $numbers = 0;
2117         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2118             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2119         }
2120
2121         # limit the $results_per_page to result size if it's more
2122         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2123
2124         # for the requested page, replace biblionumber by the complete record
2125         # speed improvement : avoid reading too much things
2126         for (
2127             my $counter = $offset ;
2128             $counter <= $offset + $results_per_page ;
2129             $counter++
2130           )
2131         {
2132             $result_hash->{'RECORDS'}[$counter] =
2133               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2134               if $result_hash->{'RECORDS'}[$counter];
2135         }
2136         my $finalresult = ();
2137         $result_hash->{'hits'}         = $numbers;
2138         $finalresult->{'biblioserver'} = $result_hash;
2139         return $finalresult;
2140     }
2141 }
2142
2143 =head2 ModBiblios
2144
2145 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2146
2147 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2148 test parameter if set donot perform change to records in database.
2149
2150 =over 2
2151
2152 =item C<input arg:>
2153
2154     * $listbiblios is an array ref to marcrecords to be changed
2155     * $tagsubfield is the reference of the subfield to change.
2156     * $initvalue is the value to search the record for
2157     * $targetvalue is the value to set the subfield to
2158     * $test is to be set only not to perform changes in database.
2159
2160 =item C<Output arg:>
2161     * $countchanged counts all the changes performed.
2162     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2163
2164 =item C<usage in the script:>
2165
2166 =back
2167
2168 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2169 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2170 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2171
2172 =cut
2173
2174 sub ModBiblios {
2175     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2176     my $countmatched;
2177     my @unmatched;
2178     my ( $tag, $subfield ) = ( $1, $2 )
2179       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2180     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2181         $tag = $tag . $subfield;
2182         undef $subfield;
2183     }
2184     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber');
2185     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2186     if ($tag eq $itemtag) {
2187         # do not allow the embedded item tag to be
2188         # edited from here
2189         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2190         return (0, []);
2191     }
2192     foreach my $usmarc (@$listbiblios) {
2193         my $record;
2194         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2195         my $biblionumber;
2196         if ($@) {
2197
2198             # usmarc is not a valid usmarc May be a biblionumber
2199             # FIXME - sorry, please let's figure out whether
2200             #         this function is to be passed a list of
2201             #         record numbers or a list of MARC::Record
2202             #         objects.  The former is probably better
2203             #         because the MARC records supplied by Zebra
2204             #         may be not current.
2205             $record       = GetMarcBiblio($usmarc);
2206             $biblionumber = $usmarc;
2207         }
2208         else {
2209             if ( $bntag >= 010 ) {
2210                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2211             }
2212             else {
2213                 $biblionumber = $record->field($bntag)->data;
2214             }
2215         }
2216
2217         #GetBiblionumber is to be written.
2218         #Could be replaced by TransformMarcToKoha (But Would be longer)
2219         if ( $record->field($tag) ) {
2220             my $modify = 0;
2221             foreach my $field ( $record->field($tag) ) {
2222                 if ($subfield) {
2223                     if (
2224                         $field->delete_subfield(
2225                             'code'  => $subfield,
2226                             'match' => qr($initvalue)
2227                         )
2228                       )
2229                     {
2230                         $countmatched++;
2231                         $modify = 1;
2232                         $field->update( $subfield, $targetvalue )
2233                           if ($targetvalue);
2234                     }
2235                 }
2236                 else {
2237                     if ( $tag >= 010 ) {
2238                         if ( $field->delete_field($field) ) {
2239                             $countmatched++;
2240                             $modify = 1;
2241                         }
2242                     }
2243                     else {
2244                         $field->data = $targetvalue
2245                           if ( $field->data =~ qr($initvalue) );
2246                     }
2247                 }
2248             }
2249
2250             #       warn $record->as_formatted;
2251             if ($modify) {
2252                 ModBiblio( $record, $biblionumber,
2253                     GetFrameworkCode($biblionumber) )
2254                   unless ($test);
2255             }
2256             else {
2257                 push @unmatched, $biblionumber;
2258             }
2259         }
2260         else {
2261             push @unmatched, $biblionumber;
2262         }
2263     }
2264     return ( $countmatched, \@unmatched );
2265 }
2266
2267 END { }    # module clean-up code here (global destructor)
2268
2269 1;
2270 __END__
2271
2272 =head1 AUTHOR
2273
2274 Koha Developement team <info@koha.org>
2275
2276 =cut