C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField
  22 use C4::Koha;      # getFacets
  23 use Lingua::Stem;
  24 use C4::Dates qw(format_date);
  25
  26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  27
  28 # set the version for version checking
  29 BEGIN {
  30     $VERSION = 3.01;
  31     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  32 }
  33
  34 =head1 NAME
  35
  36 C4::Search - Functions for searching the Koha catalog.
  37
  38 =head1 SYNOPSIS
  39
  40 See opac/opac-search.pl or catalogue/search.pl for example of usage
  41
  42 =head1 DESCRIPTION
  43
  44 This module provides searching functions for Koha's bibliographic databases
  45
  46 =head1 FUNCTIONS
  47
  48 =cut
  49
  50 @ISA    = qw(Exporter);
  51 @EXPORT = qw(
  52   &findseealso
  53   &FindDuplicate
  54   &SimpleSearch
  55   &searchResults
  56   &getRecords
  57   &buildQuery
  58   &NZgetRecords
  59   &ModBiblios
  60 );
  61
  62 # make all your functions, whether exported or not;
  63
  64 =head2 findseealso($dbh,$fields);
  65
  66 C<$dbh> is a link to the DB handler.
  67
  68 use C4::Context;
  69 my $dbh =C4::Context->dbh;
  70
  71 C<$fields> is a reference to the fields array
  72
  73 This function modifies the @$fields array and adds related fields to search on.
  74
  75 FIXME: this function is probably deprecated in Koha 3
  76
  77 =cut
  78
  79 sub findseealso {
  80     my ( $dbh, $fields ) = @_;
  81     my $tagslib = GetMarcStructure(1);
  82     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  83         my ($tag)      = substr( @$fields[$i], 1, 3 );
  84         my ($subfield) = substr( @$fields[$i], 4, 1 );
  85         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  86           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  87     }
  88 }
  89
  90 =head2 FindDuplicate
  91
  92 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  93
  94 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  95
  96 =cut
  97
  98 sub FindDuplicate {
  99     my ($record) = @_;
 100     my $dbh = C4::Context->dbh;
 101     my $result = TransformMarcToKoha( $dbh, $record, '' );
 102     my $sth;
 103     my $query;
 104     my $search;
 105     my $type;
 106     my ( $biblionumber, $title );
 107
 108     # search duplicate on ISBN, easy and fast..
 109     # ... normalize first
 110     if ( $result->{isbn} ) {
 111         $result->{isbn} =~ s/\(.*$//;
 112         $result->{isbn} =~ s/\s+$//;
 113         $query = "isbn=$result->{isbn}";
 114     }
 115     else {
 116         $result->{title} =~ s /\\//g;
 117         $result->{title} =~ s /\"//g;
 118         $result->{title} =~ s /\(//g;
 119         $result->{title} =~ s /\)//g;
 120
 121         # FIXME: instead of removing operators, could just do
 122         # quotes around the value
 123         $result->{title} =~ s/(and|or|not)//g;
 124         $query = "ti,ext=$result->{title}";
 125         $query .= " and itemtype=$result->{itemtype}"
 126           if ( $result->{itemtype} );
 127         if   ( $result->{author} ) {
 128             $result->{author} =~ s /\\//g;
 129             $result->{author} =~ s /\"//g;
 130             $result->{author} =~ s /\(//g;
 131             $result->{author} =~ s /\)//g;
 132
 133             # remove valid operators
 134             $result->{author} =~ s/(and|or|not)//g;
 135             $query .= " and au,ext=$result->{author}";
 136         }
 137     }
 138
 139     # FIXME: add error handling
 140     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 141     my @results;
 142     foreach my $possible_duplicate_record (@$searchresults) {
 143         my $marcrecord =
 144           MARC::Record->new_from_usmarc($possible_duplicate_record);
 145         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 146
 147         # FIXME :: why 2 $biblionumber ?
 148         if ($result) {
 149             push @results, $result->{'biblionumber'};
 150             push @results, $result->{'title'};
 151         }
 152     }
 153     return @results;
 154 }
 155
 156 =head2 SimpleSearch
 157
 158 ($error,$results) = SimpleSearch($query,@servers);
 159
 160 This function provides a simple search API on the bibliographic catalog
 161
 162 =over 2
 163
 164 =item C<input arg:>
 165
 166     * $query can be a simple keyword or a complete CCL query
 167     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 168
 169 =item C<Output arg:>
 170     * $error is a empty unless an error is detected
 171     * \@results is an array of records.
 172
 173 =item C<usage in the script:>
 174
 175 =back
 176
 177 my ($error, $marcresults) = SimpleSearch($query);
 178
 179 if (defined $error) {
 180     $template->param(query_error => $error);
 181     warn "error: ".$error;
 182     output_html_with_http_headers $input, $cookie, $template->output;
 183     exit;
 184 }
 185
 186 my $hits = scalar @$marcresults;
 187 my @results;
 188
 189 for(my $i=0;$i<$hits;$i++) {
 190     my %resultsloop;
 191     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 192     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 193
 194     #build the hash for the template.
 195     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 196     $resultsloop{title}           = $biblio->{'title'};
 197     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 198     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 199     $resultsloop{author}          = $biblio->{'author'};
 200     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 201     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 202
 203     push @results, \%resultsloop;
 204 }
 205
 206 $template->param(result=>\@results);
 207
 208 =cut
 209
 210 sub SimpleSearch {
 211     my $query = shift;
 212     if ( C4::Context->preference('NoZebra') ) {
 213         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 214         my $search_result =
 215           (      $result->{hits}
 216               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 217         return ( undef, $search_result );
 218     }
 219     else {
 220         my @servers = @_;
 221         my @results;
 222         my @tmpresults;
 223         my @zconns;
 224         return ( "No query entered", undef ) unless $query;
 225
 226         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 227         @servers = ("biblioserver") unless @servers;
 228
 229         # Initialize & Search Zebra
 230         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 231             eval {
 232                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 233                 $tmpresults[$i] =
 234                   $zconns[$i]
 235                   ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
 236
 237                 # error handling
 238                 my $error =
 239                     $zconns[$i]->errmsg() . " ("
 240                   . $zconns[$i]->errcode() . ") "
 241                   . $zconns[$i]->addinfo() . " "
 242                   . $zconns[$i]->diagset();
 243
 244                 return ( $error, undef ) if $zconns[$i]->errcode();
 245             };
 246             if ($@) {
 247
 248                 # caught a ZOOM::Exception
 249                 my $error =
 250                     $@->message() . " ("
 251                   . $@->code() . ") "
 252                   . $@->addinfo() . " "
 253                   . $@->diagset();
 254                 warn $error;
 255                 return ( $error, undef );
 256             }
 257         }
 258         my $hits = 0;
 259         my $ev;
 260         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 261             $ev = $zconns[ $i - 1 ]->last_event();
 262             if ( $ev == ZOOM::Event::ZEND ) {
 263                 $hits = $tmpresults[ $i - 1 ]->size();
 264             }
 265             if ( $hits > 0 ) {
 266                 for ( my $j = 0 ; $j < $hits ; $j++ ) {
 267                     my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
 268                     push @results, $record;
 269                 }
 270             }
 271             $hits = 0;
 272         }
 273         return ( undef, \@results );
 274     }
 275 }
 276
 277 =head2 getRecords
 278
 279 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 280
 281         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 282         $results_per_page, $offset,       $expanded_facet, $branches,
 283         $query_type,       $scan
 284     );
 285
 286 The all singing, all dancing, multi-server, asynchronous, scanning,
 287 searching, record nabbing, facet-building
 288
 289 See verbse embedded documentation.
 290
 291 =cut
 292
 293 sub getRecords {
 294     my (
 295         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 296         $results_per_page, $offset,       $expanded_facet, $branches,
 297         $query_type,       $scan
 298     ) = @_;
 299
 300     my @servers = @$servers_ref;
 301     my @sort_by = @$sort_by_ref;
 302
 303     # Initialize variables for the ZOOM connection and results object
 304     my $zconn;
 305     my @zconns;
 306     my @results;
 307     my $results_hashref = ();
 308
 309     # Initialize variables for the faceted results objects
 310     my $facets_counter = ();
 311     my $facets_info    = ();
 312     my $facets         = getFacets();
 313
 314     my @facets_loop
 315       ;    # stores the ref to array of hashes for template facets loop
 316
 317     ### LOOP THROUGH THE SERVERS
 318     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 319         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 320
 321 # perform the search, create the results objects
 322 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 323         my $query_to_use;
 324         if ( $servers[$i] =~ /biblioserver/ ) {
 325             $query_to_use = $koha_query;
 326         }
 327         else {
 328             $query_to_use = $simple_query;
 329         }
 330
 331         #$query_to_use = $simple_query if $scan;
 332         warn $simple_query if ( $scan and $DEBUG );
 333
 334         # Check if we've got a query_type defined, if so, use it
 335         eval {
 336             if ($query_type)
 337             {
 338                 if ( $query_type =~ /^ccl/ ) {
 339                     $query_to_use =~
 340                       s/\:/\=/g;    # change : to = last minute (FIXME)
 341                     $results[$i] =
 342                       $zconns[$i]->search(
 343                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 344                       );
 345                 }
 346                 elsif ( $query_type =~ /^cql/ ) {
 347                     $results[$i] =
 348                       $zconns[$i]->search(
 349                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 350                 }
 351                 elsif ( $query_type =~ /^pqf/ ) {
 352                     $results[$i] =
 353                       $zconns[$i]->search(
 354                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 355                 }
 356             }
 357             else {
 358                 if ($scan) {
 359                     $results[$i] =
 360                       $zconns[$i]->scan(
 361                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 362                       );
 363                 }
 364                 else {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 368                       );
 369                 }
 370             }
 371         };
 372         if ($@) {
 373             warn "WARNING: query problem with $query_to_use " . $@;
 374         }
 375
 376         # Concatenate the sort_by limits and pass them to the results object
 377         # Note: sort will override rank
 378         my $sort_by;
 379         foreach my $sort (@sort_by) {
 380             if ( $sort eq "author_az" ) {
 381                 $sort_by .= "1=1003 <i ";
 382             }
 383             elsif ( $sort eq "author_za" ) {
 384                 $sort_by .= "1=1003 >i ";
 385             }
 386             elsif ( $sort eq "popularity_asc" ) {
 387                 $sort_by .= "1=9003 <i ";
 388             }
 389             elsif ( $sort eq "popularity_dsc" ) {
 390                 $sort_by .= "1=9003 >i ";
 391             }
 392             elsif ( $sort eq "call_number_asc" ) {
 393                 $sort_by .= "1=20  <i ";
 394             }
 395             elsif ( $sort eq "call_number_dsc" ) {
 396                 $sort_by .= "1=20 >i ";
 397             }
 398             elsif ( $sort eq "pubdate_asc" ) {
 399                 $sort_by .= "1=31 <i ";
 400             }
 401             elsif ( $sort eq "pubdate_dsc" ) {
 402                 $sort_by .= "1=31 >i ";
 403             }
 404             elsif ( $sort eq "acqdate_asc" ) {
 405                 $sort_by .= "1=32 <i ";
 406             }
 407             elsif ( $sort eq "acqdate_dsc" ) {
 408                 $sort_by .= "1=32 >i ";
 409             }
 410             elsif ( $sort eq "title_az" ) {
 411                 $sort_by .= "1=4 <i ";
 412             }
 413             elsif ( $sort eq "title_za" ) {
 414                 $sort_by .= "1=4 >i ";
 415             }
 416         }
 417         if ($sort_by) {
 418             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 419                 warn "WARNING sort $sort_by failed";
 420             }
 421         }
 422     }    # finished looping through servers
 423
 424     # The big moment: asynchronously retrieve results from all servers
 425     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 426         my $ev = $zconns[ $i - 1 ]->last_event();
 427         if ( $ev == ZOOM::Event::ZEND ) {
 428             next unless $results[ $i - 1 ];
 429             my $size = $results[ $i - 1 ]->size();
 430             if ( $size > 0 ) {
 431                 my $results_hash;
 432
 433                 # loop through the results
 434                 $results_hash->{'hits'} = $size;
 435                 my $times;
 436                 if ( $offset + $results_per_page <= $size ) {
 437                     $times = $offset + $results_per_page;
 438                 }
 439                 else {
 440                     $times = $size;
 441                 }
 442                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 443                     my $records_hash;
 444                     my $record;
 445                     my $facet_record;
 446
 447                     ## Check if it's an index scan
 448                     if ($scan) {
 449                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 450
 451                  # here we create a minimal MARC record and hand it off to the
 452                  # template just like a normal result ... perhaps not ideal, but
 453                  # it works for now
 454                         my $tmprecord = MARC::Record->new();
 455                         $tmprecord->encoding('UTF-8');
 456                         my $tmptitle;
 457                         my $tmpauthor;
 458
 459                 # the minimal record in author/title (depending on MARC flavour)
 460                         if ( C4::Context->preference("marcflavour") eq
 461                             "UNIMARC" )
 462                         {
 463                             $tmptitle = MARC::Field->new(
 464                                 '200', ' ', ' ',
 465                                 a => $term,
 466                                 f => $occ
 467                             );
 468                         }
 469                         else {
 470                             $tmptitle =
 471                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 472                             $tmpauthor =
 473                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 474                         }
 475                         $tmprecord->append_fields($tmptitle);
 476                         $tmprecord->append_fields($tmpauthor);
 477                         $results_hash->{'RECORDS'}[$j] =
 478                           $tmprecord->as_usmarc();
 479                     }
 480
 481                     # not an index scan
 482                     else {
 483                         $record = $results[ $i - 1 ]->record($j)->raw();
 484
 485                         # warn "RECORD $j:".$record;
 486                         $results_hash->{'RECORDS'}[$j] = $record;
 487
 488             # Fill the facets while we're looping, but only for the biblioserver
 489                         $facet_record = MARC::Record->new_from_usmarc($record)
 490                           if $servers[ $i - 1 ] =~ /biblioserver/;
 491
 492                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 493                         if ($facet_record) {
 494                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 495
 496                                 if ( $facets->[$k] ) {
 497                                     my @fields;
 498                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 499                                     {
 500                                         push @fields,
 501                                           $facet_record->field($tag);
 502                                     }
 503                                     for my $field (@fields) {
 504                                         my @subfields = $field->subfields();
 505                                         for my $subfield (@subfields) {
 506                                             my ( $code, $data ) = @$subfield;
 507                                             if ( $code eq
 508                                                 $facets->[$k]->{'subfield'} )
 509                                             {
 510                                                 $facets_counter->{ $facets->[$k]
 511                                                       ->{'link_value'} }
 512                                                   ->{$data}++;
 513                                             }
 514                                         }
 515                                     }
 516                                     $facets_info->{ $facets->[$k]
 517                                           ->{'link_value'} }->{'label_value'} =
 518                                       $facets->[$k]->{'label_value'};
 519                                     $facets_info->{ $facets->[$k]
 520                                           ->{'link_value'} }->{'expanded'} =
 521                                       $facets->[$k]->{'expanded'};
 522                                 }
 523                             }
 524                         }
 525                     }
 526                 }
 527                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 528             }
 529
 530             # warn "connection ", $i-1, ": $size hits";
 531             # warn $results[$i-1]->record(0)->render() if $size > 0;
 532
 533             # BUILD FACETS
 534             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 535                 for my $link_value (
 536                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 537                     keys %$facets_counter )
 538                 {
 539                     my $expandable;
 540                     my $number_of_facets;
 541                     my @this_facets_array;
 542                     for my $one_facet (
 543                         sort {
 544                             $facets_counter->{$link_value}
 545                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 546                         } keys %{ $facets_counter->{$link_value} }
 547                       )
 548                     {
 549                         $number_of_facets++;
 550                         if (   ( $number_of_facets < 6 )
 551                             || ( $expanded_facet eq $link_value )
 552                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 553                         {
 554
 555                       # Sanitize the link value ), ( will cause errors with CCL,
 556                             my $facet_link_value = $one_facet;
 557                             $facet_link_value =~ s/(\(|\))/ /g;
 558
 559                             # fix the length that will display in the label,
 560                             my $facet_label_value = $one_facet;
 561                             $facet_label_value =
 562                               substr( $one_facet, 0, 20 ) . "..."
 563                               unless length($facet_label_value) <= 20;
 564
 565                             # if it's a branch, label by the name, not the code,
 566                             if ( $link_value =~ /branch/ ) {
 567                                 $facet_label_value =
 568                                   $branches->{$one_facet}->{'branchname'};
 569                             }
 570
 571                 # but we're down with the whole label being in the link's title.
 572                             my $facet_title_value = $one_facet;
 573
 574                             push @this_facets_array,
 575                               (
 576                                 {
 577                                     facet_count =>
 578                                       $facets_counter->{$link_value}
 579                                       ->{$one_facet},
 580                                     facet_label_value => $facet_label_value,
 581                                     facet_title_value => $facet_title_value,
 582                                     facet_link_value  => $facet_link_value,
 583                                     type_link_value   => $link_value,
 584                                 },
 585                               );
 586                         }
 587                     }
 588
 589                     # handle expanded option
 590                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 591                         $expandable = 1
 592                           if ( ( $number_of_facets > 6 )
 593                             && ( $expanded_facet ne $link_value ) );
 594                     }
 595                     push @facets_loop,
 596                       (
 597                         {
 598                             type_link_value => $link_value,
 599                             type_id         => $link_value . "_id",
 600                             type_label =>
 601                               $facets_info->{$link_value}->{'label_value'},
 602                             facets     => \@this_facets_array,
 603                             expandable => $expandable,
 604                             expand     => $link_value,
 605                         }
 606                       );
 607                 }
 608             }
 609         }
 610     }
 611     return ( undef, $results_hashref, \@facets_loop );
 612 }
 613
 614 use C4::Search::PazPar2;
 615 use XML::Simple;
 616 use Data::Dumper;
 617 sub pazGetRecords {
 618     my (
 619         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 620         $results_per_page, $offset,       $expanded_facet, $branches,
 621         $query_type,       $scan
 622     ) = @_;
 623
 624     my $paz = C4::Search::PazPar2->new('http://localhost:10006/search.pz2');
 625     $paz->init();
 626     $paz->search($simple_query);
 627     sleep 1;
 628
 629     # do results
 630     my $results_hashref = {};
 631     my $stats = XMLin($paz->stat);
 632     $results_hashref->{'biblioserver'}->{'hits'} = $stats->{'hits'};
 633     my $results = XMLin($paz->show($offset, $results_per_page), forcearray => 1);
 634     #die Dumper($results);
 635     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 636         warn "hit";
 637         my $recid = $hit->{recid}->[0];
 638         #if ($recid =~ /[\200-\377]/) {
 639         if ($recid =~ /sodot/) {
 640             #die "bad $recid\n";
 641             #probably do not want non-ASCII in record ID
 642             last HIT;
 643         }
 644         my $count = 1;
 645         if (exists $hit->{count}) {
 646             $count = $hit->{count}->[0];
 647         }
 648         #die $count;
 649         for (my $i = 0; $i < $count; $i++) {
 650             warn "look for $recid offset = $i";
 651             my $rec = $paz->record($recid, $i);
 652             warn "got record $i";
 653             push @{ $results_hashref->{'biblioserver'}->{'RECORDS'} }, $paz->record($recid, $i);
 654         }
 655     }
 656     warn "past hits";
 657
 658     # pass through facets
 659     my $termlist_xml = $paz->termlist('author,subject');
 660     my $terms = XMLin($termlist_xml, forcearray => 1);
 661     my @facets_loop = ();
 662     foreach my $list (sort keys %{ $terms->{'list'} }) {
 663         my @facets = ();
 664         foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 665             push @facets, {
 666                 facet_label_value => $facet->{'name'}->[0],
 667             };
 668         }
 669         push @facets_loop, ( {
 670             type_label => $list,
 671             facets => \@facets,
 672         } );
 673     }
 674
 675     return ( undef, $results_hashref, \@facets_loop );
 676 }
 677
 678 # STOPWORDS
 679 sub _remove_stopwords {
 680     my ( $operand, $index ) = @_;
 681     my @stopwords_removed;
 682
 683     # phrase and exact-qualified indexes shouldn't have stopwords removed
 684     if ( $index !~ m/phr|ext/ ) {
 685
 686 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 687 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 688 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 689 #       is a stopword, we'd get "çon" and wouldn't find anything...
 690         foreach ( keys %{ C4::Context->stopwords } ) {
 691             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 692             if ( $operand =~
 693                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
 694             {
 695                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 696                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 697                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 698                 push @stopwords_removed, $_;
 699             }
 700         }
 701     }
 702     return ( $operand, \@stopwords_removed );
 703 }
 704
 705 # TRUNCATION
 706 sub _detect_truncation {
 707     my ( $operand, $index ) = @_;
 708     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 709         @regexpr );
 710     $operand =~ s/^ //g;
 711     my @wordlist = split( /\s/, $operand );
 712     foreach my $word (@wordlist) {
 713         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 714             push @rightlefttruncated, $word;
 715         }
 716         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 717             push @lefttruncated, $word;
 718         }
 719         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 720             push @righttruncated, $word;
 721         }
 722         elsif ( index( $word, "*" ) < 0 ) {
 723             push @nontruncated, $word;
 724         }
 725         else {
 726             push @regexpr, $word;
 727         }
 728     }
 729     return (
 730         \@nontruncated,       \@righttruncated, \@lefttruncated,
 731         \@rightlefttruncated, \@regexpr
 732     );
 733 }
 734
 735 # STEMMING
 736 sub _build_stemmed_operand {
 737     my ($operand) = @_;
 738     my $stemmed_operand;
 739
 740 # FIXME: the locale should be set based on the user's language and/or search choice
 741     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 742
 743 # FIXME: these should be stored in the db so the librarian can modify the behavior
 744     $stemmer->add_exceptions(
 745         {
 746             'and' => 'and',
 747             'or'  => 'or',
 748             'not' => 'not',
 749         }
 750     );
 751     my @words = split( / /, $operand );
 752     my $stems = $stemmer->stem(@words);
 753     for my $stem (@$stems) {
 754         $stemmed_operand .= "$stem";
 755         $stemmed_operand .= "?"
 756           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 757         $stemmed_operand .= " ";
 758     }
 759     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 760     return $stemmed_operand;
 761 }
 762
 763 # FIELD WEIGHTING
 764 sub _build_weighted_query {
 765
 766 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 767 # pretty well but could work much better if we had a smarter query parser
 768     my ( $operand, $stemmed_operand, $index ) = @_;
 769     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 770     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 771     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 772
 773     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 774
 775     # Keyword, or, no index specified
 776     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 777         $weighted_query .=
 778           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 779         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 780         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 781           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 782           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 783         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 784           if $fuzzy_enabled;    # add fuzzy, word list
 785         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 786           if ( $stemming and $stemmed_operand )
 787           ;                     # add stemming, right truncation
 788         $weighted_query .= " or wrdl,r9=\"$operand\"";
 789
 790         # embedded sorting: 0 a-z; 1 z-a
 791         # $weighted_query .= ") or (sort1,aut=1";
 792     }
 793
 794     # Barcode searches should skip this process
 795     elsif ( $index eq 'bc' ) {
 796         $weighted_query .= "bc=\"$operand\"";
 797     }
 798
 799     # Authority-number searches should skip this process
 800     elsif ( $index eq 'an' ) {
 801         $weighted_query .= "an=\"$operand\"";
 802     }
 803
 804     # If the index already has more than one qualifier, wrap the operand
 805     # in quotes and pass it back (assumption is that the user knows what they
 806     # are doing and won't appreciate us mucking up their query
 807     elsif ( $index =~ ',' ) {
 808         $weighted_query .= " $index=\"$operand\"";
 809     }
 810
 811     #TODO: build better cases based on specific search indexes
 812     else {
 813         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 814           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 815         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 816         $weighted_query .=
 817           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 818     }
 819
 820     $weighted_query .= "))";                       # close rank specification
 821     return $weighted_query;
 822 }
 823
 824 =head2 buildQuery
 825
 826 ( $error, $query,
 827 $simple_query, $query_cgi,
 828 $query_desc, $limit,
 829 $limit_cgi, $limit_desc,
 830 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 831
 832 Build queries and limits in CCL, CGI, Human,
 833 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 834
 835 See verbose embedded documentation.
 836
 837
 838 =cut
 839
 840 sub buildQuery {
 841     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 842
 843     warn "---------"        if $DEBUG;
 844     warn "Enter buildQuery" if $DEBUG;
 845     warn "---------"        if $DEBUG;
 846
 847     # dereference
 848     my @operators = @$operators if $operators;
 849     my @indexes   = @$indexes   if $indexes;
 850     my @operands  = @$operands  if $operands;
 851     my @limits    = @$limits    if $limits;
 852     my @sort_by   = @$sort_by   if $sort_by;
 853
 854     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 855     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 856     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 857     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 858     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 859
 860     # no stemming/weight/fuzzy in NoZebra
 861     if ( C4::Context->preference("NoZebra") ) {
 862         $stemming      = 0;
 863         $weight_fields = 0;
 864         $fuzzy_enabled = 0;
 865     }
 866
 867     my $query        = $operands[0];
 868     my $simple_query = $operands[0];
 869
 870     # initialize the variables we're passing back
 871     my $query_cgi;
 872     my $query_desc;
 873     my $query_type;
 874
 875     my $limit;
 876     my $limit_cgi;
 877     my $limit_desc;
 878
 879     my $stopwords_removed;    # flag to determine if stopwords have been removed
 880
 881 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 882 # DIAGNOSTIC ONLY!!
 883     if ( $query =~ /^ccl=/ ) {
 884         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 885     }
 886     if ( $query =~ /^cql=/ ) {
 887         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 888     }
 889     if ( $query =~ /^pqf=/ ) {
 890         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 891     }
 892
 893     # pass nested queries directly
 894     # FIXME: need better handling of some of these variables in this case
 895     if ( $query =~ /(\(|\))/ ) {
 896         return (
 897             undef,              $query, $simple_query, $query_cgi,
 898             $query,             $limit, $limit_cgi,    $limit_desc,
 899             $stopwords_removed, 'ccl'
 900         );
 901     }
 902
 903 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 904 # query operands and indexes and add stemming, truncation, field weighting, etc.
 905 # Once we do so, we'll end up with a value in $query, just like if we had an
 906 # incoming $query from the user
 907     else {
 908         $query = ""
 909           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 910         my $previous_operand
 911           ;    # a flag used to keep track if there was a previous query
 912                # if there was, we can apply the current operator
 913                # for every operand
 914         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 915
 916             # COMBINE OPERANDS, INDEXES AND OPERATORS
 917             if ( $operands[$i] ) {
 918
 919               # A flag to determine whether or not to add the index to the query
 920                 my $indexes_set;
 921
 922 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 923                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 924                     $weight_fields    = 0;
 925                     $stemming         = 0;
 926                     $remove_stopwords = 0;
 927                 }
 928                 my $operand = $operands[$i];
 929                 my $index   = $indexes[$i];
 930
 931                 # Add index-specific attributes
 932                 # Date of Publication
 933                 if ( $index eq 'yr' ) {
 934                     $index .= ",st-numeric";
 935                     $indexes_set++;
 936                     (
 937                         $stemming,      $auto_truncation,
 938                         $weight_fields, $fuzzy_enabled,
 939                         $remove_stopwords
 940                     ) = ( 0, 0, 0, 0, 0 );
 941                 }
 942
 943                 # Date of Acquisition
 944                 elsif ( $index eq 'acqdate' ) {
 945                     $index .= ",st-date-normalized";
 946                     $indexes_set++;
 947                     (
 948                         $stemming,      $auto_truncation,
 949                         $weight_fields, $fuzzy_enabled,
 950                         $remove_stopwords
 951                     ) = ( 0, 0, 0, 0, 0 );
 952                 }
 953
 954                 # Set default structure attribute (word list)
 955                 my $struct_attr;
 956                 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 957                     $struct_attr = ",wrdl";
 958                 }
 959
 960                 # Some helpful index variants
 961                 my $index_plus       = $index . $struct_attr . ":" if $index;
 962                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 963
 964                 # Remove Stopwords
 965                 if ($remove_stopwords) {
 966                     ( $operand, $stopwords_removed ) =
 967                       _remove_stopwords( $operand, $index );
 968                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 969                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 970                       if ( $stopwords_removed && $DEBUG );
 971                 }
 972
 973                 # Detect Truncation
 974                 my ( $nontruncated, $righttruncated, $lefttruncated,
 975                     $rightlefttruncated, $regexpr );
 976                 my $truncated_operand;
 977                 (
 978                     $nontruncated, $righttruncated, $lefttruncated,
 979                     $rightlefttruncated, $regexpr
 980                 ) = _detect_truncation( $operand, $index );
 981                 warn
 982 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 983                   if $DEBUG;
 984
 985                 # Apply Truncation
 986                 if (
 987                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 988                     scalar(@$rightlefttruncated) > 0 )
 989                 {
 990
 991                # Don't field weight or add the index to the query, we do it here
 992                     $indexes_set = 1;
 993                     undef $weight_fields;
 994                     my $previous_truncation_operand;
 995                     if ( scalar(@$nontruncated) > 0 ) {
 996                         $truncated_operand .= "$index_plus @$nontruncated ";
 997                         $previous_truncation_operand = 1;
 998                     }
 999                     if ( scalar(@$righttruncated) > 0 ) {
1000                         $truncated_operand .= "and "
1001                           if $previous_truncation_operand;
1002                         $truncated_operand .=
1003                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1004                         $previous_truncation_operand = 1;
1005                     }
1006                     if ( scalar(@$lefttruncated) > 0 ) {
1007                         $truncated_operand .= "and "
1008                           if $previous_truncation_operand;
1009                         $truncated_operand .=
1010                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1011                         $previous_truncation_operand = 1;
1012                     }
1013                     if ( scalar(@$rightlefttruncated) > 0 ) {
1014                         $truncated_operand .= "and "
1015                           if $previous_truncation_operand;
1016                         $truncated_operand .=
1017                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1018                         $previous_truncation_operand = 1;
1019                     }
1020                 }
1021                 $operand = $truncated_operand if $truncated_operand;
1022                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1023
1024                 # Handle Stemming
1025                 my $stemmed_operand;
1026                 $stemmed_operand = _build_stemmed_operand($operand)
1027                   if $stemming;
1028                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1029
1030                 # Handle Field Weighting
1031                 my $weighted_operand;
1032                 $weighted_operand =
1033                   _build_weighted_query( $operand, $stemmed_operand, $index )
1034                   if $weight_fields;
1035                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1036                 $operand = $weighted_operand if $weight_fields;
1037                 $indexes_set = 1 if $weight_fields;
1038
1039                 # If there's a previous operand, we need to add an operator
1040                 if ($previous_operand) {
1041
1042                     # User-specified operator
1043                     if ( $operators[ $i - 1 ] ) {
1044                         $query     .= " $operators[$i-1] ";
1045                         $query     .= " $index_plus " unless $indexes_set;
1046                         $query     .= " $operand";
1047                         $query_cgi .= "&op=$operators[$i-1]";
1048                         $query_cgi .= "&idx=$index" if $index;
1049                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1050                         $query_desc .=
1051                           " $operators[$i-1] $index_plus $operands[$i]";
1052                     }
1053
1054                     # Default operator is and
1055                     else {
1056                         $query      .= " and ";
1057                         $query      .= "$index_plus " unless $indexes_set;
1058                         $query      .= "$operand";
1059                         $query_cgi  .= "&op=and&idx=$index" if $index;
1060                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1061                         $query_desc .= " and $index_plus $operands[$i]";
1062                     }
1063                 }
1064
1065                 # There isn't a pervious operand, don't need an operator
1066                 else {
1067
1068                     # Field-weighted queries already have indexes set
1069                     $query .= " $index_plus " unless $indexes_set;
1070                     $query .= $operand;
1071                     $query_desc .= " $index_plus $operands[$i]";
1072                     $query_cgi  .= "&idx=$index" if $index;
1073                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1074                     $previous_operand = 1;
1075                 }
1076             }    #/if $operands
1077         }    # /for
1078     }
1079     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1080
1081     # add limits
1082     my $group_OR_limits;
1083     my $availability_limit;
1084     foreach my $this_limit (@limits) {
1085         if ( $this_limit =~ /available/ ) {
1086
1087 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1088 # In English:
1089 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1090             $availability_limit .=
1091 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1092             $limit_cgi  .= "&limit=available";
1093             $limit_desc .= "";
1094         }
1095
1096         # group_OR_limits, prefixed by mc-
1097         # OR every member of the group
1098         elsif ( $this_limit =~ /mc/ ) {
1099             $group_OR_limits .= " or " if $group_OR_limits;
1100             $limit_desc      .= " or " if $group_OR_limits;
1101             $group_OR_limits .= "$this_limit";
1102             $limit_cgi       .= "&limit=$this_limit";
1103             $limit_desc      .= " $this_limit";
1104         }
1105
1106         # Regular old limits
1107         else {
1108             $limit .= " and " if $limit || $query;
1109             $limit      .= "$this_limit";
1110             $limit_cgi  .= "&limit=$this_limit";
1111             $limit_desc .= " $this_limit";
1112         }
1113     }
1114     if ($group_OR_limits) {
1115         $limit .= " and " if ( $query || $limit );
1116         $limit .= "($group_OR_limits)";
1117     }
1118     if ($availability_limit) {
1119         $limit .= " and " if ( $query || $limit );
1120         $limit .= "($availability_limit)";
1121     }
1122
1123     # Normalize the query and limit strings
1124     $query =~ s/:/=/g;
1125     $limit =~ s/:/=/g;
1126     for ( $query, $query_desc, $limit, $limit_desc ) {
1127         $_ =~ s/  / /g;    # remove extra spaces
1128         $_ =~ s/^ //g;     # remove any beginning spaces
1129         $_ =~ s/ $//g;     # remove any ending spaces
1130         $_ =~ s/==/=/g;    # remove double == from query
1131     }
1132     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1133
1134     for ($query_cgi,$simple_query) {
1135         $_ =~ s/"//g;
1136     }
1137     # append the limit to the query
1138     $query .= " " . $limit;
1139
1140     # Warnings if DEBUG
1141     if ($DEBUG) {
1142         warn "QUERY:" . $query;
1143         warn "QUERY CGI:" . $query_cgi;
1144         warn "QUERY DESC:" . $query_desc;
1145         warn "LIMIT:" . $limit;
1146         warn "LIMIT CGI:" . $limit_cgi;
1147         warn "LIMIT DESC:" . $limit_desc;
1148         warn "---------";
1149         warn "Leave buildQuery";
1150         warn "---------";
1151     }
1152     return (
1153         undef,              $query, $simple_query, $query_cgi,
1154         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1155         $stopwords_removed, $query_type
1156     );
1157 }
1158
1159 =head2 searchResults
1160
1161 Format results in a form suitable for passing to the template
1162
1163 =cut
1164
1165 # IMO this subroutine is pretty messy still -- it's responsible for
1166 # building the HTML output for the template
1167 sub searchResults {
1168     my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1169     my $dbh = C4::Context->dbh;
1170     my $toggle;
1171     my $even = 1;
1172     my @newresults;
1173
1174     # add search-term highlighting via <span>s on the search terms
1175     my $span_terms_hashref;
1176     for my $span_term ( split( / /, $searchdesc ) ) {
1177         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1178         $span_terms_hashref->{$span_term}++;
1179     }
1180
1181     #Build branchnames hash
1182     #find branchname
1183     #get branch information.....
1184     my %branches;
1185     my $bsth =
1186       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1187       ;    # FIXME : use C4::Koha::GetBranches
1188     $bsth->execute();
1189     while ( my $bdata = $bsth->fetchrow_hashref ) {
1190         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1191     }
1192     my %locations;
1193     my $lsch =
1194       $dbh->prepare(
1195 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1196       );
1197     $lsch->execute();
1198     while ( my $ldata = $lsch->fetchrow_hashref ) {
1199         $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1200     }
1201
1202     #Build itemtype hash
1203     #find itemtype & itemtype image
1204     my %itemtypes;
1205     $bsth =
1206       $dbh->prepare(
1207         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1208       );
1209     $bsth->execute();
1210     while ( my $bdata = $bsth->fetchrow_hashref ) {
1211         $itemtypes{ $bdata->{'itemtype'} }->{description} =
1212           $bdata->{'description'};
1213         $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1214         $itemtypes{ $bdata->{'itemtype'} }->{summary}  = $bdata->{'summary'};
1215         $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1216           $bdata->{'notforloan'};
1217     }
1218
1219     #search item field code
1220     my $sth =
1221       $dbh->prepare(
1222 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1223       );
1224     $sth->execute;
1225     my ($itemtag) = $sth->fetchrow;
1226
1227     # get notforloan authorised value list
1228     $sth =
1229       $dbh->prepare(
1230 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1231       );
1232     $sth->execute;
1233     my ($notforloan_authorised_value) = $sth->fetchrow;
1234
1235     ## find column names of items related to MARC
1236     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1237     $sth2->execute;
1238     my %subfieldstosearch;
1239     while ( ( my $column ) = $sth2->fetchrow ) {
1240         my ( $tagfield, $tagsubfield ) =
1241           &GetMarcFromKohaField( "items." . $column, "" );
1242         $subfieldstosearch{$column} = $tagsubfield;
1243     }
1244
1245     # handle which records to actually retrieve
1246     my $times;
1247     if ( $hits && $offset + $results_per_page <= $hits ) {
1248         $times = $offset + $results_per_page;
1249     }
1250     else {
1251         $times = $hits;
1252     }
1253
1254     # loop through all of the records we've retrieved
1255     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1256         my $marcrecord;
1257         $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1258         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1259         $oldbiblio->{result_number} = $i + 1;
1260
1261         # add imageurl to itemtype if there is one
1262         if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1263             $oldbiblio->{imageurl} =
1264               $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1265             $oldbiblio->{description} =
1266               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1267         }
1268         else {
1269             $oldbiblio->{imageurl} =
1270               getitemtypeimagesrc() . "/"
1271               . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1272               if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1273             $oldbiblio->{description} =
1274               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1275         }
1276
1277  # Build summary if there is one (the summary is defined in the itemtypes table)
1278  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1279         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1280             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1281             my @fields  = $marcrecord->fields();
1282             foreach my $field (@fields) {
1283                 my $tag      = $field->tag();
1284                 my $tagvalue = $field->as_string();
1285                 $summary =~
1286                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1287                 unless ( $tag < 10 ) {
1288                     my @subf = $field->subfields;
1289                     for my $i ( 0 .. $#subf ) {
1290                         my $subfieldcode  = $subf[$i][0];
1291                         my $subfieldvalue = $subf[$i][1];
1292                         my $tagsubf       = $tag . $subfieldcode;
1293                         $summary =~
1294 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1295                     }
1296                 }
1297             }
1298             # FIXME: yuk
1299             $summary =~ s/\[(.*?)]//g;
1300             $summary =~ s/\n/<br>/g;
1301             $oldbiblio->{summary} = $summary;
1302         }
1303
1304 # Add search-term highlighting to the whole record where they match using <span>s
1305         if (C4::Context->preference("OpacHighlightedWords")){
1306             my $searchhighlightblob;
1307             for my $highlight_field ( $marcrecord->fields ) {
1308
1309     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1310                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1311                 for my $subfield ($highlight_field->subfields()) {
1312                     my $match;
1313                     next if $subfield->[0] eq '9';
1314                     my $field = $subfield->[1];
1315                     for my $term ( keys %$span_terms_hashref ) {
1316                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1317                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1318                         $match++;
1319                         }
1320                     }
1321                     $searchhighlightblob .= $field . " ... " if $match;
1322                 }
1323
1324             }
1325             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1326             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1327         }
1328 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1329         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1330
1331         # Add search-term highlighting to the title, subtitle, etc. fields
1332         for my $term ( keys %$span_terms_hashref ) {
1333             my $old_term = $term;
1334             if ( length($term) > 3 ) {
1335                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1336                 $oldbiblio->{'title'} =~
1337                   s/$term/<span class=\"term\">$&<\/span>/gi;
1338                 $oldbiblio->{'subtitle'} =~
1339                   s/$term/<span class=\"term\">$&<\/span>/gi;
1340                 $oldbiblio->{'author'} =~
1341                   s/$term/<span class=\"term\">$&<\/span>/gi;
1342                 $oldbiblio->{'publishercode'} =~
1343                   s/$term/<span class=\"term\">$&<\/span>/gi;
1344                 $oldbiblio->{'place'} =~
1345                   s/$term/<span class=\"term\">$&<\/span>/gi;
1346                 $oldbiblio->{'pages'} =~
1347                   s/$term/<span class=\"term\">$&<\/span>/gi;
1348                 $oldbiblio->{'notes'} =~
1349                   s/$term/<span class=\"term\">$&<\/span>/gi;
1350                 $oldbiblio->{'size'} =~
1351                   s/$term/<span class=\"term\">$&<\/span>/gi;
1352             }
1353         }
1354
1355         # FIXME:
1356         # surely there's a better way to handle this
1357         if ( $i % 2 ) {
1358             $toggle = "#ffffcc";
1359         }
1360         else {
1361             $toggle = "white";
1362         }
1363         $oldbiblio->{'toggle'} = $toggle;
1364
1365         # Pull out the items fields
1366         my @fields = $marcrecord->field($itemtag);
1367
1368         # Setting item statuses for display
1369         my @available_items_loop;
1370         my @onloan_items_loop;
1371         my @other_items_loop;
1372
1373         my $available_items;
1374         my $onloan_items;
1375         my $other_items;
1376
1377         my $ordered_count     = 0;
1378         my $available_count   = 0;
1379         my $onloan_count      = 0;
1380         my $longoverdue_count = 0;
1381         my $other_count       = 0;
1382         my $wthdrawn_count    = 0;
1383         my $itemlost_count    = 0;
1384         my $itembinding_count = 0;
1385         my $itemdamaged_count = 0;
1386         my $can_place_holds   = 0;
1387         my $items_count       = scalar(@fields);
1388         my $items_counter;
1389         my $maxitems =
1390           ( C4::Context->preference('maxItemsinSearchResults') )
1391           ? C4::Context->preference('maxItemsinSearchResults') - 1
1392           : 1;
1393
1394         # loop through every item
1395         foreach my $field (@fields) {
1396             my $item;
1397             $items_counter++;
1398
1399             # populate the items hash
1400             foreach my $code ( keys %subfieldstosearch ) {
1401                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1402             }
1403
1404             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1405             if ( $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} ) {
1406                 $item->{'branchname'} = $branches{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} };
1407             }
1408             # Last resort
1409             elsif ( $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'holdingbranch':'homebranch'} ) {
1410                 $item->{'branchname'} = $branches{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'holdingbranch':'homebranch'} };
1411             }
1412
1413 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1414             if ( $item->{onloan} ) {
1415                 $onloan_count++;
1416                 $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{due_date} = format_date( $item->{onloan} );
1417                 $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{count}++ if $item->{'homebranch'};
1418                 $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{branchname} = $item->{'branchname'};
1419                 $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{location} = $locations{ $item->{location} };
1420                 $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{itemcallnumber} = $item->{itemcallnumber};
1421         $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1422                 # if something's checked out and lost, mark it as 'long overdue'
1423                 if ( $item->{itemlost} ) {
1424                     $onloan_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{longoverdue}++;
1425                     $longoverdue_count++;
1426                 }
1427
1428                 # can place holds as long as this item isn't lost
1429                 else {
1430                     $can_place_holds = 1;
1431                 }
1432             }
1433
1434          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1435             else {
1436
1437                 # item is on order
1438                 if ( $item->{notforloan} == -1 ) {
1439                     $ordered_count++;
1440                 }
1441
1442                 # item is withdrawn, lost or damaged
1443                 if (   $item->{wthdrawn}
1444                     || $item->{itemlost}
1445                     || $item->{damaged}
1446                     || $item->{notforloan} )
1447                 {
1448                     $wthdrawn_count++    if $item->{wthdrawn};
1449                     $itemlost_count++    if $item->{itemlost};
1450                     $itemdamaged_count++ if $item->{damaged};
1451                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1452                     $other_count++;
1453
1454                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{wthdrawn} = $item->{wthdrawn};
1455                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemlost} = $item->{itemlost};
1456                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{damaged} = $item->{damaged};
1457                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{notforloan} = GetAuthorisedValueDesc( '', '', $item->{notforloan}, '', '', $notforloan_authorised_value ) if $notforloan_authorised_value;
1458                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{count}++ if $item->{'homebranch'};
1459                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{branchname} = $item->{'branchname'};
1460                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{location} = $locations{ $item->{location} };
1461                     $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1462             $other_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1463                 }
1464
1465                 # item is available
1466                 else {
1467                     $can_place_holds = 1;
1468                     $available_count++;
1469                     $available_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1470                     $available_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1471                     $available_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{location} = $locations{ $item->{location} };
1472                     $available_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1473             $available_items->{ $item->{C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch'?'homebranch':'holdingbranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1474                 }
1475             }
1476         }    # notforloan, item level and biblioitem level
1477         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1478         $maxitems =
1479           ( C4::Context->preference('maxItemsinSearchResults') )
1480           ? C4::Context->preference('maxItemsinSearchResults') - 1
1481           : 1;
1482         for my $key ( sort keys %$onloan_items ) {
1483             $onloanitemscount++;
1484             push @onloan_items_loop, $onloan_items->{$key}
1485               unless $onloanitemscount > $maxitems;
1486         }
1487         for my $key ( sort keys %$other_items ) {
1488             $otheritemscount++;
1489             push @other_items_loop, $other_items->{$key}
1490               unless $otheritemscount > $maxitems;
1491         }
1492         for my $key ( sort keys %$available_items ) {
1493             $availableitemscount++;
1494             push @available_items_loop, $available_items->{$key}
1495               unless $availableitemscount > $maxitems;
1496         }
1497
1498 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1499         $can_place_holds = 0
1500           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1501         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1502         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1503         $oldbiblio->{items_count}          = $items_count;
1504         $oldbiblio->{available_items_loop} = \@available_items_loop;
1505         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1506         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1507         $oldbiblio->{availablecount}       = $available_count;
1508         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1509         $oldbiblio->{onloancount}          = $onloan_count;
1510         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1511         $oldbiblio->{othercount}           = $other_count;
1512         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1513         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1514         $oldbiblio->{itemlostcount}        = $itemlost_count;
1515         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1516         $oldbiblio->{orderedcount}         = $ordered_count;
1517         $oldbiblio->{isbn} =~
1518           s/-//g;    # deleting - in isbn to enable amazon content
1519         push( @newresults, $oldbiblio );
1520     }
1521     return @newresults;
1522 }
1523
1524 #----------------------------------------------------------------------
1525 #
1526 # Non-Zebra GetRecords#
1527 #----------------------------------------------------------------------
1528
1529 =head2 NZgetRecords
1530
1531   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1532
1533 =cut
1534
1535 sub NZgetRecords {
1536     my (
1537         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1538         $results_per_page, $offset,       $expanded_facet, $branches,
1539         $query_type,       $scan
1540     ) = @_;
1541     warn "query =$query" if $DEBUG;
1542     my $result = NZanalyse($query);
1543     warn "results =$result" if $DEBUG;
1544     return ( undef,
1545         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1546         undef );
1547 }
1548
1549 =head2 NZanalyse
1550
1551   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1552   the list is built from an inverted index in the nozebra SQL table
1553   note that title is here only for convenience : the sorting will be very fast when requested on title
1554   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1555
1556 =cut
1557
1558 sub NZanalyse {
1559     my ( $string, $server ) = @_;
1560 #     warn "---------"       if $DEBUG;
1561     warn " NZanalyse" if $DEBUG;
1562 #     warn "---------"       if $DEBUG;
1563
1564  # $server contains biblioserver or authorities, depending on what we search on.
1565  #warn "querying : $string on $server";
1566     $server = 'biblioserver' unless $server;
1567
1568 # if we have a ", replace the content to discard temporarily any and/or/not inside
1569     my $commacontent;
1570     if ( $string =~ /"/ ) {
1571         $string =~ s/"(.*?)"/__X__/;
1572         $commacontent = $1;
1573         warn "commacontent : $commacontent" if $DEBUG;
1574     }
1575
1576 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1577 # then, call again NZanalyse with $left and $right
1578 # (recursive until we find a leaf (=> something without and/or/not)
1579 # delete repeated operator... Would then go in infinite loop
1580     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1581     }
1582
1583     #process parenthesis before.
1584     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1585         my $left     = $1;
1586         my $right    = $4;
1587         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1588         warn
1589 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1590           if $DEBUG;
1591         my $leftresult = NZanalyse( $left, $server );
1592         if ($operator) {
1593             my $rightresult = NZanalyse( $right, $server );
1594
1595             # OK, we have the results for right and left part of the query
1596             # depending of operand, intersect, union or exclude both lists
1597             # to get a result list
1598             if ( $operator eq ' and ' ) {
1599                 return NZoperatorAND($leftresult,$rightresult);
1600             }
1601             elsif ( $operator eq ' or ' ) {
1602
1603                 # just merge the 2 strings
1604                 return $leftresult . $rightresult;
1605             }
1606             elsif ( $operator eq ' not ' ) {
1607                 return NZoperatorNOT($leftresult,$rightresult);
1608             }
1609         }
1610         else {
1611 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1612             return $leftresult;
1613         }
1614     }
1615     warn "string :" . $string if $DEBUG;
1616     $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1617     my $left     = $1;
1618     my $right    = $3;
1619     my $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1620     warn "no parenthesis. left : $left operator: $operator right: $right"
1621       if $DEBUG;
1622
1623     # it's not a leaf, we have a and/or/not
1624     if ($operator) {
1625
1626         # reintroduce comma content if needed
1627         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1628         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1629         warn "node : $left / $operator / $right\n" if $DEBUG;
1630         my $leftresult  = NZanalyse( $left,  $server );
1631         my $rightresult = NZanalyse( $right, $server );
1632         warn " leftresult : $leftresult" if $DEBUG;
1633         warn " rightresult : $rightresult" if $DEBUG;
1634         # OK, we have the results for right and left part of the query
1635         # depending of operand, intersect, union or exclude both lists
1636         # to get a result list
1637         if ( $operator eq ' and ' ) {
1638             warn "NZAND";
1639             return NZoperatorAND($leftresult,$rightresult);
1640         }
1641         elsif ( $operator eq ' or ' ) {
1642
1643             # just merge the 2 strings
1644             return $leftresult . $rightresult;
1645         }
1646         elsif ( $operator eq ' not ' ) {
1647             return NZoperatorNOT($leftresult,$rightresult);
1648         }
1649         else {
1650
1651 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1652             die "error : operand unknown : $operator for $string";
1653         }
1654
1655         # it's a leaf, do the real SQL query and return the result
1656     }
1657     else {
1658         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1659         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1660         warn "leaf:$string" if $DEBUG;
1661
1662         # parse the string in in operator/operand/value again
1663         $string =~ /(.*)(>=|<=)(.*)/;
1664         my $left     = $1;
1665         my $operator = $2;
1666         my $right    = $3;
1667 #         warn "handling leaf... left:$left operator:$operator right:$right"
1668 #           if $DEBUG;
1669         unless ($operator) {
1670             $string =~ /(.*)(>|<|=)(.*)/;
1671             $left     = $1;
1672             $operator = $2;
1673             $right    = $3;
1674 #             warn
1675 # "handling unless (operator)... left:$left operator:$operator right:$right"
1676 #               if $DEBUG;
1677         }
1678         my $results;
1679
1680 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1681         $left =~ s/[, ].*$//;
1682
1683         # automatic replace for short operators
1684         $left = 'title'            if $left =~ '^ti$';
1685         $left = 'author'           if $left =~ '^au$';
1686         $left = 'publisher'        if $left =~ '^pb$';
1687         $left = 'subject'          if $left =~ '^su$';
1688         $left = 'koha-Auth-Number' if $left =~ '^an$';
1689         $left = 'keyword'          if $left =~ '^kw$';
1690         warn "handling leaf... left:$left operator:$operator right:$right";
1691         if ( $operator && $left ne 'keyword' ) {
1692
1693             #do a specific search
1694             my $dbh = C4::Context->dbh;
1695             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1696             my $sth =
1697               $dbh->prepare(
1698 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1699               );
1700             warn "$left / $operator / $right\n";
1701
1702             # split each word, query the DB and build the biblionumbers result
1703             #sanitizing leftpart
1704             $left =~ s/^\s+|\s+$//;
1705             foreach ( split / /, $right ) {
1706                 my $biblionumbers;
1707                 $_ =~ s/^\s+|\s+$//;
1708                 next unless $_;
1709                 warn "EXECUTE : $server, $left, $_";
1710                 $sth->execute( $server, $left, $_ )
1711                   or warn "execute failed: $!";
1712                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1713
1714 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1715 # otherwise, fill the result
1716                     $biblionumbers .= $line
1717                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1718                     warn "result : $value "
1719                       . ( $right  =~ /\d/ ) . "=="
1720                       . ( $value =~ /\D/?$line:"" );         #= $line";
1721                 }
1722
1723 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1724                 if ($results) {
1725                     warn "NZAND";
1726                     $results = NZoperatorAND($biblionumbers,$results);
1727                 }
1728                 else {
1729                     $results = $biblionumbers;
1730                 }
1731             }
1732         }
1733         else {
1734
1735       #do a complete search (all indexes), if index='kw' do complete search too.
1736             my $dbh = C4::Context->dbh;
1737             my $sth =
1738               $dbh->prepare(
1739 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1740               );
1741
1742             # split each word, query the DB and build the biblionumbers result
1743             foreach ( split / /, $string ) {
1744                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1745                 warn "search on all indexes on $_" if $DEBUG;
1746                 my $biblionumbers;
1747                 next unless $_;
1748                 $sth->execute( $server, $_ );
1749                 while ( my $line = $sth->fetchrow ) {
1750                     $biblionumbers .= $line;
1751                 }
1752
1753 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1754                 if ($results) {
1755                     $results = NZoperatorAND($biblionumbers,$results);
1756                 }
1757                 else {
1758                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1759                     $results = $biblionumbers;
1760                 }
1761             }
1762         }
1763         warn "return : $results for LEAF : $string" if $DEBUG;
1764         return $results;
1765     }
1766     warn "---------"       if $DEBUG;
1767     warn "Leave NZanalyse" if $DEBUG;
1768     warn "---------"       if $DEBUG;
1769 }
1770
1771 sub NZoperatorAND{
1772     my ($rightresult, $leftresult)=@_;
1773
1774     my @leftresult = split /;/, $leftresult;
1775     warn " @leftresult / $rightresult \n" if $DEBUG;
1776
1777     #             my @rightresult = split /;/,$leftresult;
1778     my $finalresult;
1779
1780 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1781 # the result is stored twice, to have the same weight for AND than OR.
1782 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1783 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1784     foreach (@leftresult) {
1785         my $value = $_;
1786         my $countvalue;
1787         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1788         if ( $rightresult =~ /$value-(\d+);/ ) {
1789             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1790             $finalresult .=
1791                 "$value-$countvalue;$value-$countvalue;";
1792         }
1793     }
1794     warn " $finalresult \n" if $DEBUG;
1795     return $finalresult;
1796 }
1797
1798 sub NZoperatorOR{
1799     my ($rightresult, $leftresult)=@_;
1800     return $rightresult.$leftresult;
1801 }
1802
1803 sub NZoperatorNOT{
1804     my ($rightresult, $leftresult)=@_;
1805
1806     my @leftresult = split /;/, $leftresult;
1807
1808     #             my @rightresult = split /;/,$leftresult;
1809     my $finalresult;
1810     foreach (@leftresult) {
1811         my $value=$_;
1812         $value=$1 if $value=~m/(.*)-\d+$/;
1813         unless ($rightresult =~ "$value-") {
1814             $finalresult .= "$_;";
1815         }
1816     }
1817     return $finalresult;
1818 }
1819
1820 =head2 NZorder
1821
1822   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1823
1824   TODO :: Description
1825
1826 =cut
1827
1828 sub NZorder {
1829     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1830     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1831
1832     # order title asc by default
1833     #     $ordering = '1=36 <i' unless $ordering;
1834     $results_per_page = 20 unless $results_per_page;
1835     $offset           = 0  unless $offset;
1836     my $dbh = C4::Context->dbh;
1837
1838     #
1839     # order by POPULARITY
1840     #
1841     if ( $ordering =~ /popularity/ ) {
1842         my %result;
1843         my %popularity;
1844
1845         # popularity is not in MARC record, it's builded from a specific query
1846         my $sth =
1847           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1848         foreach ( split /;/, $biblionumbers ) {
1849             my ( $biblionumber, $title ) = split /,/, $_;
1850             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1851             $sth->execute($biblionumber);
1852             my $popularity = $sth->fetchrow || 0;
1853
1854 # hint : the key is popularity.title because we can have
1855 # many results with the same popularity. In this cas, sub-ordering is done by title
1856 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1857 # (un-frequent, I agree, but we won't forget anything that way ;-)
1858             $popularity{ sprintf( "%10d", $popularity ) . $title
1859                   . $biblionumber } = $biblionumber;
1860         }
1861
1862     # sort the hash and return the same structure as GetRecords (Zebra querying)
1863         my $result_hash;
1864         my $numbers = 0;
1865         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1866             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1867                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1868                   $result{ $popularity{$key} }->as_usmarc();
1869             }
1870         }
1871         else {                                    # sort popularity ASC
1872             foreach my $key ( sort ( keys %popularity ) ) {
1873                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1874                   $result{ $popularity{$key} }->as_usmarc();
1875             }
1876         }
1877         my $finalresult = ();
1878         $result_hash->{'hits'}         = $numbers;
1879         $finalresult->{'biblioserver'} = $result_hash;
1880         return $finalresult;
1881
1882         #
1883         # ORDER BY author
1884         #
1885     }
1886     elsif ( $ordering =~ /author/ ) {
1887         my %result;
1888         foreach ( split /;/, $biblionumbers ) {
1889             my ( $biblionumber, $title ) = split /,/, $_;
1890             my $record = GetMarcBiblio($biblionumber);
1891             my $author;
1892             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1893                 $author = $record->subfield( '200', 'f' );
1894                 $author = $record->subfield( '700', 'a' ) unless $author;
1895             }
1896             else {
1897                 $author = $record->subfield( '100', 'a' );
1898             }
1899
1900 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1901 # and we don't want to get only 1 result for each of them !!!
1902             $result{ $author . $biblionumber } = $record;
1903         }
1904
1905     # sort the hash and return the same structure as GetRecords (Zebra querying)
1906         my $result_hash;
1907         my $numbers = 0;
1908         if ( $ordering eq 'author_za' ) {    # sort by author desc
1909             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1910                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1911                   $result{$key}->as_usmarc();
1912             }
1913         }
1914         else {                               # sort by author ASC
1915             foreach my $key ( sort ( keys %result ) ) {
1916                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1917                   $result{$key}->as_usmarc();
1918             }
1919         }
1920         my $finalresult = ();
1921         $result_hash->{'hits'}         = $numbers;
1922         $finalresult->{'biblioserver'} = $result_hash;
1923         return $finalresult;
1924
1925         #
1926         # ORDER BY callnumber
1927         #
1928     }
1929     elsif ( $ordering =~ /callnumber/ ) {
1930         my %result;
1931         foreach ( split /;/, $biblionumbers ) {
1932             my ( $biblionumber, $title ) = split /,/, $_;
1933             my $record = GetMarcBiblio($biblionumber);
1934             my $callnumber;
1935             my ( $callnumber_tag, $callnumber_subfield ) =
1936               GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1937             ( $callnumber_tag, $callnumber_subfield ) =
1938               GetMarcFromKohaField('biblioitems.callnumber')
1939               unless $callnumber_tag;
1940             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1941                 $callnumber = $record->subfield( '200', 'f' );
1942             }
1943             else {
1944                 $callnumber = $record->subfield( '100', 'a' );
1945             }
1946
1947 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1948 # and we don't want to get only 1 result for each of them !!!
1949             $result{ $callnumber . $biblionumber } = $record;
1950         }
1951
1952     # sort the hash and return the same structure as GetRecords (Zebra querying)
1953         my $result_hash;
1954         my $numbers = 0;
1955         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1956             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1957                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1958                   $result{$key}->as_usmarc();
1959             }
1960         }
1961         else {                                     # sort by title ASC
1962             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1963                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1964                   $result{$key}->as_usmarc();
1965             }
1966         }
1967         my $finalresult = ();
1968         $result_hash->{'hits'}         = $numbers;
1969         $finalresult->{'biblioserver'} = $result_hash;
1970         return $finalresult;
1971     }
1972     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1973         my %result;
1974         foreach ( split /;/, $biblionumbers ) {
1975             my ( $biblionumber, $title ) = split /,/, $_;
1976             my $record = GetMarcBiblio($biblionumber);
1977             my ( $publicationyear_tag, $publicationyear_subfield ) =
1978               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1979             my $publicationyear =
1980               $record->subfield( $publicationyear_tag,
1981                 $publicationyear_subfield );
1982
1983 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1984 # and we don't want to get only 1 result for each of them !!!
1985             $result{ $publicationyear . $biblionumber } = $record;
1986         }
1987
1988     # sort the hash and return the same structure as GetRecords (Zebra querying)
1989         my $result_hash;
1990         my $numbers = 0;
1991         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
1992             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1993                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1994                   $result{$key}->as_usmarc();
1995             }
1996         }
1997         else {                                 # sort by pub year ASC
1998             foreach my $key ( sort ( keys %result ) ) {
1999                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2000                   $result{$key}->as_usmarc();
2001             }
2002         }
2003         my $finalresult = ();
2004         $result_hash->{'hits'}         = $numbers;
2005         $finalresult->{'biblioserver'} = $result_hash;
2006         return $finalresult;
2007
2008         #
2009         # ORDER BY title
2010         #
2011     }
2012     elsif ( $ordering =~ /title/ ) {
2013
2014 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2015         my %result;
2016         foreach ( split /;/, $biblionumbers ) {
2017             my ( $biblionumber, $title ) = split /,/, $_;
2018
2019 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2020 # and we don't want to get only 1 result for each of them !!!
2021 # hint & speed improvement : we can order without reading the record
2022 # so order, and read records only for the requested page !
2023             $result{ $title . $biblionumber } = $biblionumber;
2024         }
2025
2026     # sort the hash and return the same structure as GetRecords (Zebra querying)
2027         my $result_hash;
2028         my $numbers = 0;
2029         if ( $ordering eq 'title_az' ) {    # sort by title desc
2030             foreach my $key ( sort ( keys %result ) ) {
2031                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2032             }
2033         }
2034         else {                              # sort by title ASC
2035             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2036                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2037             }
2038         }
2039
2040         # limit the $results_per_page to result size if it's more
2041         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2042
2043         # for the requested page, replace biblionumber by the complete record
2044         # speed improvement : avoid reading too much things
2045         for (
2046             my $counter = $offset ;
2047             $counter <= $offset + $results_per_page ;
2048             $counter++
2049           )
2050         {
2051             $result_hash->{'RECORDS'}[$counter] =
2052               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2053         }
2054         my $finalresult = ();
2055         $result_hash->{'hits'}         = $numbers;
2056         $finalresult->{'biblioserver'} = $result_hash;
2057         return $finalresult;
2058     }
2059     else {
2060
2061 #
2062 # order by ranking
2063 #
2064 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2065         my %result;
2066         my %count_ranking;
2067         foreach ( split /;/, $biblionumbers ) {
2068             my ( $biblionumber, $title ) = split /,/, $_;
2069             $title =~ /(.*)-(\d)/;
2070
2071             # get weight
2072             my $ranking = $2;
2073
2074 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2075 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2076 # biblio N has ranking = 6
2077             $count_ranking{$biblionumber} += $ranking;
2078         }
2079
2080 # build the result by "inverting" the count_ranking hash
2081 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2082 #         warn "counting";
2083         foreach ( keys %count_ranking ) {
2084             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2085         }
2086
2087     # sort the hash and return the same structure as GetRecords (Zebra querying)
2088         my $result_hash;
2089         my $numbers = 0;
2090         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2091             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2092         }
2093
2094         # limit the $results_per_page to result size if it's more
2095         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2096
2097         # for the requested page, replace biblionumber by the complete record
2098         # speed improvement : avoid reading too much things
2099         for (
2100             my $counter = $offset ;
2101             $counter <= $offset + $results_per_page ;
2102             $counter++
2103           )
2104         {
2105             $result_hash->{'RECORDS'}[$counter] =
2106               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2107               if $result_hash->{'RECORDS'}[$counter];
2108         }
2109         my $finalresult = ();
2110         $result_hash->{'hits'}         = $numbers;
2111         $finalresult->{'biblioserver'} = $result_hash;
2112         return $finalresult;
2113     }
2114 }
2115
2116 =head2 ModBiblios
2117
2118 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2119
2120 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2121 test parameter if set donot perform change to records in database.
2122
2123 =over 2
2124
2125 =item C<input arg:>
2126
2127     * $listbiblios is an array ref to marcrecords to be changed
2128     * $tagsubfield is the reference of the subfield to change.
2129     * $initvalue is the value to search the record for
2130     * $targetvalue is the value to set the subfield to
2131     * $test is to be set only not to perform changes in database.
2132
2133 =item C<Output arg:>
2134     * $countchanged counts all the changes performed.
2135     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2136
2137 =item C<usage in the script:>
2138
2139 =back
2140
2141 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2142 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2143 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2144
2145 =cut
2146
2147 sub ModBiblios {
2148     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2149     my $countmatched;
2150     my @unmatched;
2151     my ( $tag, $subfield ) = ( $1, $2 )
2152       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2153     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2154         $tag = $tag . $subfield;
2155         undef $subfield;
2156     }
2157     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber');
2158     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2159     if ($tag eq $itemtag) {
2160         # do not allow the embedded item tag to be
2161         # edited from here
2162         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2163         return (0, []);
2164     }
2165     foreach my $usmarc (@$listbiblios) {
2166         my $record;
2167         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2168         my $biblionumber;
2169         if ($@) {
2170
2171             # usmarc is not a valid usmarc May be a biblionumber
2172             # FIXME - sorry, please let's figure out whether
2173             #         this function is to be passed a list of
2174             #         record numbers or a list of MARC::Record
2175             #         objects.  The former is probably better
2176             #         because the MARC records supplied by Zebra
2177             #         may be not current.
2178             $record       = GetMarcBiblio($usmarc);
2179             $biblionumber = $usmarc;
2180         }
2181         else {
2182             if ( $bntag >= 010 ) {
2183                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2184             }
2185             else {
2186                 $biblionumber = $record->field($bntag)->data;
2187             }
2188         }
2189
2190         #GetBiblionumber is to be written.
2191         #Could be replaced by TransformMarcToKoha (But Would be longer)
2192         if ( $record->field($tag) ) {
2193             my $modify = 0;
2194             foreach my $field ( $record->field($tag) ) {
2195                 if ($subfield) {
2196                     if (
2197                         $field->delete_subfield(
2198                             'code'  => $subfield,
2199                             'match' => qr($initvalue)
2200                         )
2201                       )
2202                     {
2203                         $countmatched++;
2204                         $modify = 1;
2205                         $field->update( $subfield, $targetvalue )
2206                           if ($targetvalue);
2207                     }
2208                 }
2209                 else {
2210                     if ( $tag >= 010 ) {
2211                         if ( $field->delete_field($field) ) {
2212                             $countmatched++;
2213                             $modify = 1;
2214                         }
2215                     }
2216                     else {
2217                         $field->data = $targetvalue
2218                           if ( $field->data =~ qr($initvalue) );
2219                     }
2220                 }
2221             }
2222
2223             #       warn $record->as_formatted;
2224             if ($modify) {
2225                 ModBiblio( $record, $biblionumber,
2226                     GetFrameworkCode($biblionumber) )
2227                   unless ($test);
2228             }
2229             else {
2230                 push @unmatched, $biblionumber;
2231             }
2232         }
2233         else {
2234             push @unmatched, $biblionumber;
2235         }
2236     }
2237     return ( $countmatched, \@unmatched );
2238 }
2239
2240 END { }    # module clean-up code here (global destructor)
2241
2242 1;
2243 __END__
2244
2245 =head1 AUTHOR
2246
2247 Koha Developement team <info@koha.org>
2248
2249 =cut