C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 require Exporter;
  20 use C4::Context;
  21 use C4::Biblio;    # GetMarcFromKohaField
  22 use C4::Koha;      # getFacets
  23 use Lingua::Stem;
  24 use C4::Dates qw(format_date);
  25
  26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  27
  28 # set the version for version checking
  29 BEGIN {
  30     $VERSION = 3.01;
  31     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  32 }
  33
  34 =head1 NAME
  35
  36 C4::Search - Functions for searching the Koha catalog.
  37
  38 =head1 SYNOPSIS
  39
  40 See opac/opac-search.pl or catalogue/search.pl for example of usage
  41
  42 =head1 DESCRIPTION
  43
  44 This module provides searching functions for Koha's bibliographic databases
  45
  46 =head1 FUNCTIONS
  47
  48 =cut
  49
  50 @ISA    = qw(Exporter);
  51 @EXPORT = qw(
  52   &findseealso
  53   &FindDuplicate
  54   &SimpleSearch
  55   &searchResults
  56   &getRecords
  57   &buildQuery
  58   &NZgetRecords
  59   &ModBiblios
  60 );
  61
  62 # make all your functions, whether exported or not;
  63
  64 =head2 findseealso($dbh,$fields);
  65
  66 C<$dbh> is a link to the DB handler.
  67
  68 use C4::Context;
  69 my $dbh =C4::Context->dbh;
  70
  71 C<$fields> is a reference to the fields array
  72
  73 This function modifies the @$fields array and adds related fields to search on.
  74
  75 FIXME: this function is probably deprecated in Koha 3
  76
  77 =cut
  78
  79 sub findseealso {
  80     my ( $dbh, $fields ) = @_;
  81     my $tagslib = GetMarcStructure(1);
  82     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  83         my ($tag)      = substr( @$fields[$i], 1, 3 );
  84         my ($subfield) = substr( @$fields[$i], 4, 1 );
  85         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  86           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  87     }
  88 }
  89
  90 =head2 FindDuplicate
  91
  92 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  93
  94 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  95
  96 =cut
  97
  98 sub FindDuplicate {
  99     my ($record) = @_;
 100     my $dbh = C4::Context->dbh;
 101     my $result = TransformMarcToKoha( $dbh, $record, '' );
 102     my $sth;
 103     my $query;
 104     my $search;
 105     my $type;
 106     my ( $biblionumber, $title );
 107
 108     # search duplicate on ISBN, easy and fast..
 109     # ... normalize first
 110     if ( $result->{isbn} ) {
 111         $result->{isbn} =~ s/\(.*$//;
 112         $result->{isbn} =~ s/\s+$//;
 113         $query = "isbn=$result->{isbn}";
 114     }
 115     else {
 116         $result->{title} =~ s /\\//g;
 117         $result->{title} =~ s /\"//g;
 118         $result->{title} =~ s /\(//g;
 119         $result->{title} =~ s /\)//g;
 120
 121         # FIXME: instead of removing operators, could just do
 122         # quotes around the value
 123         $result->{title} =~ s/(and|or|not)//g;
 124         $query = "ti,ext=$result->{title}";
 125         $query .= " and itemtype=$result->{itemtype}"
 126           if ( $result->{itemtype} );
 127         if   ( $result->{author} ) {
 128             $result->{author} =~ s /\\//g;
 129             $result->{author} =~ s /\"//g;
 130             $result->{author} =~ s /\(//g;
 131             $result->{author} =~ s /\)//g;
 132
 133             # remove valid operators
 134             $result->{author} =~ s/(and|or|not)//g;
 135             $query .= " and au,ext=$result->{author}";
 136         }
 137     }
 138
 139     # FIXME: add error handling
 140     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 141     my @results;
 142     foreach my $possible_duplicate_record (@$searchresults) {
 143         my $marcrecord =
 144           MARC::Record->new_from_usmarc($possible_duplicate_record);
 145         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 146
 147         # FIXME :: why 2 $biblionumber ?
 148         if ($result) {
 149             push @results, $result->{'biblionumber'};
 150             push @results, $result->{'title'};
 151         }
 152     }
 153     return @results;
 154 }
 155
 156 =head2 SimpleSearch
 157
 158 ($error,$results) = SimpleSearch($query,@servers);
 159
 160 This function provides a simple search API on the bibliographic catalog
 161
 162 =over 2
 163
 164 =item C<input arg:>
 165
 166     * $query can be a simple keyword or a complete CCL query
 167     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 168
 169 =item C<Output arg:>
 170     * $error is a empty unless an error is detected
 171     * \@results is an array of records.
 172
 173 =item C<usage in the script:>
 174
 175 =back
 176
 177 my ($error, $marcresults) = SimpleSearch($query);
 178
 179 if (defined $error) {
 180     $template->param(query_error => $error);
 181     warn "error: ".$error;
 182     output_html_with_http_headers $input, $cookie, $template->output;
 183     exit;
 184 }
 185
 186 my $hits = scalar @$marcresults;
 187 my @results;
 188
 189 for(my $i=0;$i<$hits;$i++) {
 190     my %resultsloop;
 191     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 192     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 193
 194     #build the hash for the template.
 195     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 196     $resultsloop{title}           = $biblio->{'title'};
 197     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 198     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 199     $resultsloop{author}          = $biblio->{'author'};
 200     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 201     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 202
 203     push @results, \%resultsloop;
 204 }
 205
 206 $template->param(result=>\@results);
 207
 208 =cut
 209
 210 sub SimpleSearch {
 211     my $query = shift;
 212     if ( C4::Context->preference('NoZebra') ) {
 213         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 214         my $search_result =
 215           (      $result->{hits}
 216               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 217         return ( undef, $search_result );
 218     }
 219     else {
 220         my @servers = @_;
 221         my @results;
 222         my @tmpresults;
 223         my @zconns;
 224         return ( "No query entered", undef ) unless $query;
 225
 226         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 227         @servers = ("biblioserver") unless @servers;
 228
 229         # Initialize & Search Zebra
 230         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 231             eval {
 232                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 233                 $tmpresults[$i] =
 234                   $zconns[$i]
 235                   ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) );
 236
 237                 # error handling
 238                 my $error =
 239                     $zconns[$i]->errmsg() . " ("
 240                   . $zconns[$i]->errcode() . ") "
 241                   . $zconns[$i]->addinfo() . " "
 242                   . $zconns[$i]->diagset();
 243
 244                 return ( $error, undef ) if $zconns[$i]->errcode();
 245             };
 246             if ($@) {
 247
 248                 # caught a ZOOM::Exception
 249                 my $error =
 250                     $@->message() . " ("
 251                   . $@->code() . ") "
 252                   . $@->addinfo() . " "
 253                   . $@->diagset();
 254                 warn $error;
 255                 return ( $error, undef );
 256             }
 257         }
 258         my $hits = 0;
 259         my $ev;
 260         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 261             $ev = $zconns[ $i - 1 ]->last_event();
 262             if ( $ev == ZOOM::Event::ZEND ) {
 263                 $hits = $tmpresults[ $i - 1 ]->size();
 264             }
 265             if ( $hits > 0 ) {
 266                 for ( my $j = 0 ; $j < $hits ; $j++ ) {
 267                     my $record = $tmpresults[ $i - 1 ]->record($j)->raw();
 268                     push @results, $record;
 269                 }
 270             }
 271             $hits = 0;
 272         }
 273         return ( undef, \@results );
 274     }
 275 }
 276
 277 =head2 getRecords
 278
 279 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 280
 281         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 282         $results_per_page, $offset,       $expanded_facet, $branches,
 283         $query_type,       $scan
 284     );
 285
 286 The all singing, all dancing, multi-server, asynchronous, scanning,
 287 searching, record nabbing, facet-building
 288
 289 See verbse embedded documentation.
 290
 291 =cut
 292
 293 sub getRecords {
 294     my (
 295         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 296         $results_per_page, $offset,       $expanded_facet, $branches,
 297         $query_type,       $scan
 298     ) = @_;
 299
 300     my @servers = @$servers_ref;
 301     my @sort_by = @$sort_by_ref;
 302
 303     # Initialize variables for the ZOOM connection and results object
 304     my $zconn;
 305     my @zconns;
 306     my @results;
 307     my $results_hashref = ();
 308
 309     # Initialize variables for the faceted results objects
 310     my $facets_counter = ();
 311     my $facets_info    = ();
 312     my $facets         = getFacets();
 313
 314     my @facets_loop
 315       ;    # stores the ref to array of hashes for template facets loop
 316
 317     ### LOOP THROUGH THE SERVERS
 318     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 319         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 320
 321 # perform the search, create the results objects
 322 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 323         my $query_to_use;
 324         if ( $servers[$i] =~ /biblioserver/ ) {
 325             $query_to_use = $koha_query;
 326         }
 327         else {
 328             $query_to_use = $simple_query;
 329         }
 330
 331         #$query_to_use = $simple_query if $scan;
 332         warn $simple_query if ( $scan and $DEBUG );
 333
 334         # Check if we've got a query_type defined, if so, use it
 335         eval {
 336             if ($query_type)
 337             {
 338                 if ( $query_type =~ /^ccl/ ) {
 339                     $query_to_use =~
 340                       s/\:/\=/g;    # change : to = last minute (FIXME)
 341                     $results[$i] =
 342                       $zconns[$i]->search(
 343                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 344                       );
 345                 }
 346                 elsif ( $query_type =~ /^cql/ ) {
 347                     $results[$i] =
 348                       $zconns[$i]->search(
 349                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 350                 }
 351                 elsif ( $query_type =~ /^pqf/ ) {
 352                     $results[$i] =
 353                       $zconns[$i]->search(
 354                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 355                 }
 356             }
 357             else {
 358                 if ($scan) {
 359                     $results[$i] =
 360                       $zconns[$i]->scan(
 361                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 362                       );
 363                 }
 364                 else {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 368                       );
 369                 }
 370             }
 371         };
 372         if ($@) {
 373             warn "WARNING: query problem with $query_to_use " . $@;
 374         }
 375
 376         # Concatenate the sort_by limits and pass them to the results object
 377         # Note: sort will override rank
 378         my $sort_by;
 379         foreach my $sort (@sort_by) {
 380             if ( $sort eq "author_az" ) {
 381                 $sort_by .= "1=1003 <i ";
 382             }
 383             elsif ( $sort eq "author_za" ) {
 384                 $sort_by .= "1=1003 >i ";
 385             }
 386             elsif ( $sort eq "popularity_asc" ) {
 387                 $sort_by .= "1=9003 <i ";
 388             }
 389             elsif ( $sort eq "popularity_dsc" ) {
 390                 $sort_by .= "1=9003 >i ";
 391             }
 392             elsif ( $sort eq "call_number_asc" ) {
 393                 $sort_by .= "1=20  <i ";
 394             }
 395             elsif ( $sort eq "call_number_dsc" ) {
 396                 $sort_by .= "1=20 >i ";
 397             }
 398             elsif ( $sort eq "pubdate_asc" ) {
 399                 $sort_by .= "1=31 <i ";
 400             }
 401             elsif ( $sort eq "pubdate_dsc" ) {
 402                 $sort_by .= "1=31 >i ";
 403             }
 404             elsif ( $sort eq "acqdate_asc" ) {
 405                 $sort_by .= "1=32 <i ";
 406             }
 407             elsif ( $sort eq "acqdate_dsc" ) {
 408                 $sort_by .= "1=32 >i ";
 409             }
 410             elsif ( $sort eq "title_az" ) {
 411                 $sort_by .= "1=4 <i ";
 412             }
 413             elsif ( $sort eq "title_za" ) {
 414                 $sort_by .= "1=4 >i ";
 415             }
 416         }
 417         if ($sort_by) {
 418             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 419                 warn "WARNING sort $sort_by failed";
 420             }
 421         }
 422     }    # finished looping through servers
 423
 424     # The big moment: asynchronously retrieve results from all servers
 425     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 426         my $ev = $zconns[ $i - 1 ]->last_event();
 427         if ( $ev == ZOOM::Event::ZEND ) {
 428             next unless $results[ $i - 1 ];
 429             my $size = $results[ $i - 1 ]->size();
 430             if ( $size > 0 ) {
 431                 my $results_hash;
 432
 433                 # loop through the results
 434                 $results_hash->{'hits'} = $size;
 435                 my $times;
 436                 if ( $offset + $results_per_page <= $size ) {
 437                     $times = $offset + $results_per_page;
 438                 }
 439                 else {
 440                     $times = $size;
 441                 }
 442                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 443                     my $records_hash;
 444                     my $record;
 445                     my $facet_record;
 446
 447                     ## Check if it's an index scan
 448                     if ($scan) {
 449                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 450
 451                  # here we create a minimal MARC record and hand it off to the
 452                  # template just like a normal result ... perhaps not ideal, but
 453                  # it works for now
 454                         my $tmprecord = MARC::Record->new();
 455                         $tmprecord->encoding('UTF-8');
 456                         my $tmptitle;
 457                         my $tmpauthor;
 458
 459                 # the minimal record in author/title (depending on MARC flavour)
 460                         if ( C4::Context->preference("marcflavour") eq
 461                             "UNIMARC" )
 462                         {
 463                             $tmptitle = MARC::Field->new(
 464                                 '200', ' ', ' ',
 465                                 a => $term,
 466                                 f => $occ
 467                             );
 468                         }
 469                         else {
 470                             $tmptitle =
 471                               MARC::Field->new( '245', ' ', ' ', a => $term, );
 472                             $tmpauthor =
 473                               MARC::Field->new( '100', ' ', ' ', a => $occ, );
 474                         }
 475                         $tmprecord->append_fields($tmptitle);
 476                         $tmprecord->append_fields($tmpauthor);
 477                         $results_hash->{'RECORDS'}[$j] =
 478                           $tmprecord->as_usmarc();
 479                     }
 480
 481                     # not an index scan
 482                     else {
 483                         $record = $results[ $i - 1 ]->record($j)->raw();
 484
 485                         # warn "RECORD $j:".$record;
 486                         $results_hash->{'RECORDS'}[$j] = $record;
 487
 488             # Fill the facets while we're looping, but only for the biblioserver
 489                         $facet_record = MARC::Record->new_from_usmarc($record)
 490                           if $servers[ $i - 1 ] =~ /biblioserver/;
 491
 492                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 493                         if ($facet_record) {
 494                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 495
 496                                 if ( $facets->[$k] ) {
 497                                     my @fields;
 498                                     for my $tag ( @{ $facets->[$k]->{'tags'} } )
 499                                     {
 500                                         push @fields,
 501                                           $facet_record->field($tag);
 502                                     }
 503                                     for my $field (@fields) {
 504                                         my @subfields = $field->subfields();
 505                                         for my $subfield (@subfields) {
 506                                             my ( $code, $data ) = @$subfield;
 507                                             if ( $code eq
 508                                                 $facets->[$k]->{'subfield'} )
 509                                             {
 510                                                 $facets_counter->{ $facets->[$k]
 511                                                       ->{'link_value'} }
 512                                                   ->{$data}++;
 513                                             }
 514                                         }
 515                                     }
 516                                     $facets_info->{ $facets->[$k]
 517                                           ->{'link_value'} }->{'label_value'} =
 518                                       $facets->[$k]->{'label_value'};
 519                                     $facets_info->{ $facets->[$k]
 520                                           ->{'link_value'} }->{'expanded'} =
 521                                       $facets->[$k]->{'expanded'};
 522                                 }
 523                             }
 524                         }
 525                     }
 526                 }
 527                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 528             }
 529
 530             # warn "connection ", $i-1, ": $size hits";
 531             # warn $results[$i-1]->record(0)->render() if $size > 0;
 532
 533             # BUILD FACETS
 534             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 535                 for my $link_value (
 536                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 537                     keys %$facets_counter )
 538                 {
 539                     my $expandable;
 540                     my $number_of_facets;
 541                     my @this_facets_array;
 542                     for my $one_facet (
 543                         sort {
 544                             $facets_counter->{$link_value}
 545                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 546                         } keys %{ $facets_counter->{$link_value} }
 547                       )
 548                     {
 549                         $number_of_facets++;
 550                         if (   ( $number_of_facets < 6 )
 551                             || ( $expanded_facet eq $link_value )
 552                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 553                         {
 554
 555                       # Sanitize the link value ), ( will cause errors with CCL,
 556                             my $facet_link_value = $one_facet;
 557                             $facet_link_value =~ s/(\(|\))/ /g;
 558
 559                             # fix the length that will display in the label,
 560                             my $facet_label_value = $one_facet;
 561                             $facet_label_value =
 562                               substr( $one_facet, 0, 20 ) . "..."
 563                               unless length($facet_label_value) <= 20;
 564
 565                             # if it's a branch, label by the name, not the code,
 566                             if ( $link_value =~ /branch/ ) {
 567                                 $facet_label_value =
 568                                   $branches->{$one_facet}->{'branchname'};
 569                             }
 570
 571                 # but we're down with the whole label being in the link's title.
 572                             my $facet_title_value = $one_facet;
 573
 574                             push @this_facets_array,
 575                               (
 576                                 {
 577                                     facet_count =>
 578                                       $facets_counter->{$link_value}
 579                                       ->{$one_facet},
 580                                     facet_label_value => $facet_label_value,
 581                                     facet_title_value => $facet_title_value,
 582                                     facet_link_value  => $facet_link_value,
 583                                     type_link_value   => $link_value,
 584                                 },
 585                               );
 586                         }
 587                     }
 588
 589                     # handle expanded option
 590                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 591                         $expandable = 1
 592                           if ( ( $number_of_facets > 6 )
 593                             && ( $expanded_facet ne $link_value ) );
 594                     }
 595                     push @facets_loop,
 596                       (
 597                         {
 598                             type_link_value => $link_value,
 599                             type_id         => $link_value . "_id",
 600                             type_label =>
 601                               $facets_info->{$link_value}->{'label_value'},
 602                             facets     => \@this_facets_array,
 603                             expandable => $expandable,
 604                             expand     => $link_value,
 605                         }
 606                       );
 607                 }
 608             }
 609         }
 610     }
 611     return ( undef, $results_hashref, \@facets_loop );
 612 }
 613
 614 use C4::Search::PazPar2;
 615 use XML::Simple;
 616 use Data::Dumper;
 617 sub pazGetRecords {
 618     my (
 619         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 620         $results_per_page, $offset,       $expanded_facet, $branches,
 621         $query_type,       $scan
 622     ) = @_;
 623
 624     my $paz = C4::Search::PazPar2->new('http://localhost:10006/search.pz2');
 625     $paz->init();
 626     $paz->search($simple_query);
 627     sleep 1;
 628
 629     # do results
 630     my $results_hashref = {};
 631     my $stats = XMLin($paz->stat);
 632     $results_hashref->{'biblioserver'}->{'hits'} = $stats->{'hits'};
 633     my $results = XMLin($paz->show($offset, $results_per_page), forcearray => 1);
 634     #die Dumper($results);
 635     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 636         warn "hit";
 637         my $recid = $hit->{recid}->[0];
 638         #if ($recid =~ /[\200-\377]/) {
 639         if ($recid =~ /sodot/) {
 640             #die "bad $recid\n";
 641             #probably do not want non-ASCII in record ID
 642             last HIT;
 643         }
 644         my $count = 1;
 645         if (exists $hit->{count}) {
 646             $count = $hit->{count}->[0];
 647         }
 648         #die $count;
 649         for (my $i = 0; $i < $count; $i++) {
 650             warn "look for $recid offset = $i";
 651             my $rec = $paz->record($recid, $i);
 652             warn "got record $i";
 653             push @{ $results_hashref->{'biblioserver'}->{'RECORDS'} }, $paz->record($recid, $i);
 654         }
 655     }
 656     warn "past hits";
 657
 658     # pass through facets
 659     my $termlist_xml = $paz->termlist('author,subject');
 660     my $terms = XMLin($termlist_xml, forcearray => 1);
 661     my @facets_loop = ();
 662     foreach my $list (sort keys %{ $terms->{'list'} }) {
 663         my @facets = ();
 664         foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 665             push @facets, {
 666                 facet_label_value => $facet->{'name'}->[0],
 667             };
 668         }
 669         push @facets_loop, ( {
 670             type_label => $list,
 671             facets => \@facets,
 672         } );
 673     }
 674
 675     return ( undef, $results_hashref, \@facets_loop );
 676 }
 677
 678 # STOPWORDS
 679 sub _remove_stopwords {
 680     my ( $operand, $index ) = @_;
 681     my @stopwords_removed;
 682
 683     # phrase and exact-qualified indexes shouldn't have stopwords removed
 684     if ( $index !~ m/phr|ext/ ) {
 685
 686 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 687 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 688 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 689 #       is a stopword, we'd get "çon" and wouldn't find anything...
 690         foreach ( keys %{ C4::Context->stopwords } ) {
 691             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 692             if ( $operand =~
 693                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ )
 694             {
 695                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 696                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 697                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 698                 push @stopwords_removed, $_;
 699             }
 700         }
 701     }
 702     return ( $operand, \@stopwords_removed );
 703 }
 704
 705 # TRUNCATION
 706 sub _detect_truncation {
 707     my ( $operand, $index ) = @_;
 708     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 709         @regexpr );
 710     $operand =~ s/^ //g;
 711     my @wordlist = split( /\s/, $operand );
 712     foreach my $word (@wordlist) {
 713         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 714             push @rightlefttruncated, $word;
 715         }
 716         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 717             push @lefttruncated, $word;
 718         }
 719         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 720             push @righttruncated, $word;
 721         }
 722         elsif ( index( $word, "*" ) < 0 ) {
 723             push @nontruncated, $word;
 724         }
 725         else {
 726             push @regexpr, $word;
 727         }
 728     }
 729     return (
 730         \@nontruncated,       \@righttruncated, \@lefttruncated,
 731         \@rightlefttruncated, \@regexpr
 732     );
 733 }
 734
 735 # STEMMING
 736 sub _build_stemmed_operand {
 737     my ($operand) = @_;
 738     my $stemmed_operand;
 739
 740 # FIXME: the locale should be set based on the user's language and/or search choice
 741     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 742
 743 # FIXME: these should be stored in the db so the librarian can modify the behavior
 744     $stemmer->add_exceptions(
 745         {
 746             'and' => 'and',
 747             'or'  => 'or',
 748             'not' => 'not',
 749         }
 750     );
 751     my @words = split( / /, $operand );
 752     my $stems = $stemmer->stem(@words);
 753     for my $stem (@$stems) {
 754         $stemmed_operand .= "$stem";
 755         $stemmed_operand .= "?"
 756           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 757         $stemmed_operand .= " ";
 758     }
 759     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 760     return $stemmed_operand;
 761 }
 762
 763 # FIELD WEIGHTING
 764 sub _build_weighted_query {
 765
 766 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 767 # pretty well but could work much better if we had a smarter query parser
 768     my ( $operand, $stemmed_operand, $index ) = @_;
 769     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 770     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 771     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 772
 773     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 774
 775     # Keyword, or, no index specified
 776     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 777         $weighted_query .=
 778           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 779         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 780         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 781           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 782           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 783         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 784           if $fuzzy_enabled;    # add fuzzy, word list
 785         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 786           if ( $stemming and $stemmed_operand )
 787           ;                     # add stemming, right truncation
 788         $weighted_query .= " or wrdl,r9=\"$operand\"";
 789
 790         # embedded sorting: 0 a-z; 1 z-a
 791         # $weighted_query .= ") or (sort1,aut=1";
 792     }
 793
 794     # Barcode searches should skip this process
 795     elsif ( $index eq 'bc' ) {
 796         $weighted_query .= "bc=\"$operand\"";
 797     }
 798
 799     # Authority-number searches should skip this process
 800     elsif ( $index eq 'an' ) {
 801         $weighted_query .= "an=\"$operand\"";
 802     }
 803
 804     # If the index already has more than one qualifier, wrap the operand
 805     # in quotes and pass it back (assumption is that the user knows what they
 806     # are doing and won't appreciate us mucking up their query
 807     elsif ( $index =~ ',' ) {
 808         $weighted_query .= " $index=\"$operand\"";
 809     }
 810
 811     #TODO: build better cases based on specific search indexes
 812     else {
 813         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 814           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 815         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 816         $weighted_query .=
 817           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 818     }
 819
 820     $weighted_query .= "))";                       # close rank specification
 821     return $weighted_query;
 822 }
 823
 824 =head2 buildQuery
 825
 826 ( $error, $query,
 827 $simple_query, $query_cgi,
 828 $query_desc, $limit,
 829 $limit_cgi, $limit_desc,
 830 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 831
 832 Build queries and limits in CCL, CGI, Human,
 833 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 834
 835 See verbose embedded documentation.
 836
 837
 838 =cut
 839
 840 sub buildQuery {
 841     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 842
 843     warn "---------"        if $DEBUG;
 844     warn "Enter buildQuery" if $DEBUG;
 845     warn "---------"        if $DEBUG;
 846
 847     # dereference
 848     my @operators = @$operators if $operators;
 849     my @indexes   = @$indexes   if $indexes;
 850     my @operands  = @$operands  if $operands;
 851     my @limits    = @$limits    if $limits;
 852     my @sort_by   = @$sort_by   if $sort_by;
 853
 854     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 855     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 856     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 857     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 858     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 859
 860     # no stemming/weight/fuzzy in NoZebra
 861     if ( C4::Context->preference("NoZebra") ) {
 862         $stemming      = 0;
 863         $weight_fields = 0;
 864         $fuzzy_enabled = 0;
 865     }
 866
 867     my $query        = $operands[0];
 868     my $simple_query = $operands[0];
 869
 870     # initialize the variables we're passing back
 871     my $query_cgi;
 872     my $query_desc;
 873     my $query_type;
 874
 875     my $limit;
 876     my $limit_cgi;
 877     my $limit_desc;
 878
 879     my $stopwords_removed;    # flag to determine if stopwords have been removed
 880
 881 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 882 # DIAGNOSTIC ONLY!!
 883     if ( $query =~ /^ccl=/ ) {
 884         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 885     }
 886     if ( $query =~ /^cql=/ ) {
 887         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 888     }
 889     if ( $query =~ /^pqf=/ ) {
 890         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 891     }
 892
 893     # pass nested queries directly
 894     # FIXME: need better handling of some of these variables in this case
 895     if ( $query =~ /(\(|\))/ ) {
 896         return (
 897             undef,              $query, $simple_query, $query_cgi,
 898             $query,             $limit, $limit_cgi,    $limit_desc,
 899             $stopwords_removed, 'ccl'
 900         );
 901     }
 902
 903 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 904 # query operands and indexes and add stemming, truncation, field weighting, etc.
 905 # Once we do so, we'll end up with a value in $query, just like if we had an
 906 # incoming $query from the user
 907     else {
 908         $query = ""
 909           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 910         my $previous_operand
 911           ;    # a flag used to keep track if there was a previous query
 912                # if there was, we can apply the current operator
 913                # for every operand
 914         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 915
 916             # COMBINE OPERANDS, INDEXES AND OPERATORS
 917             if ( $operands[$i] ) {
 918
 919               # A flag to determine whether or not to add the index to the query
 920                 my $indexes_set;
 921
 922 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 923                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 924                     $weight_fields    = 0;
 925                     $stemming         = 0;
 926                     $remove_stopwords = 0;
 927                 }
 928                 my $operand = $operands[$i];
 929                 my $index   = $indexes[$i];
 930
 931                 # Add index-specific attributes
 932                 # Date of Publication
 933                 if ( $index eq 'yr' ) {
 934                     $index .= ",st-numeric";
 935                     $indexes_set++;
 936                     (
 937                         $stemming,      $auto_truncation,
 938                         $weight_fields, $fuzzy_enabled,
 939                         $remove_stopwords
 940                     ) = ( 0, 0, 0, 0, 0 );
 941                 }
 942
 943                 # Date of Acquisition
 944                 elsif ( $index eq 'acqdate' ) {
 945                     $index .= ",st-date-normalized";
 946                     $indexes_set++;
 947                     (
 948                         $stemming,      $auto_truncation,
 949                         $weight_fields, $fuzzy_enabled,
 950                         $remove_stopwords
 951                     ) = ( 0, 0, 0, 0, 0 );
 952                 }
 953
 954                 # Set default structure attribute (word list)
 955                 my $struct_attr;
 956                 unless ( !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 957                     $struct_attr = ",wrdl";
 958                 }
 959
 960                 # Some helpful index variants
 961                 my $index_plus       = $index . $struct_attr . ":" if $index;
 962                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 963
 964                 # Remove Stopwords
 965                 if ($remove_stopwords) {
 966                     ( $operand, $stopwords_removed ) =
 967                       _remove_stopwords( $operand, $index );
 968                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 969                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 970                       if ( $stopwords_removed && $DEBUG );
 971                 }
 972
 973                 # Detect Truncation
 974                 my ( $nontruncated, $righttruncated, $lefttruncated,
 975                     $rightlefttruncated, $regexpr );
 976                 my $truncated_operand;
 977                 (
 978                     $nontruncated, $righttruncated, $lefttruncated,
 979                     $rightlefttruncated, $regexpr
 980                 ) = _detect_truncation( $operand, $index );
 981                 warn
 982 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 983                   if $DEBUG;
 984
 985                 # Apply Truncation
 986                 if (
 987                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 988                     scalar(@$rightlefttruncated) > 0 )
 989                 {
 990
 991                # Don't field weight or add the index to the query, we do it here
 992                     $indexes_set = 1;
 993                     undef $weight_fields;
 994                     my $previous_truncation_operand;
 995                     if ( scalar(@$nontruncated) > 0 ) {
 996                         $truncated_operand .= "$index_plus @$nontruncated ";
 997                         $previous_truncation_operand = 1;
 998                     }
 999                     if ( scalar(@$righttruncated) > 0 ) {
1000                         $truncated_operand .= "and "
1001                           if $previous_truncation_operand;
1002                         $truncated_operand .=
1003                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1004                         $previous_truncation_operand = 1;
1005                     }
1006                     if ( scalar(@$lefttruncated) > 0 ) {
1007                         $truncated_operand .= "and "
1008                           if $previous_truncation_operand;
1009                         $truncated_operand .=
1010                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1011                         $previous_truncation_operand = 1;
1012                     }
1013                     if ( scalar(@$rightlefttruncated) > 0 ) {
1014                         $truncated_operand .= "and "
1015                           if $previous_truncation_operand;
1016                         $truncated_operand .=
1017                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1018                         $previous_truncation_operand = 1;
1019                     }
1020                 }
1021                 $operand = $truncated_operand if $truncated_operand;
1022                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1023
1024                 # Handle Stemming
1025                 my $stemmed_operand;
1026                 $stemmed_operand = _build_stemmed_operand($operand)
1027                   if $stemming;
1028                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1029
1030                 # Handle Field Weighting
1031                 my $weighted_operand;
1032                 $weighted_operand =
1033                   _build_weighted_query( $operand, $stemmed_operand, $index )
1034                   if $weight_fields;
1035                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1036                 $operand = $weighted_operand if $weight_fields;
1037                 $indexes_set = 1 if $weight_fields;
1038
1039                 # If there's a previous operand, we need to add an operator
1040                 if ($previous_operand) {
1041
1042                     # User-specified operator
1043                     if ( $operators[ $i - 1 ] ) {
1044                         $query     .= " $operators[$i-1] ";
1045                         $query     .= " $index_plus " unless $indexes_set;
1046                         $query     .= " $operand";
1047                         $query_cgi .= "&op=$operators[$i-1]";
1048                         $query_cgi .= "&idx=$index" if $index;
1049                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1050                         $query_desc .=
1051                           " $operators[$i-1] $index_plus $operands[$i]";
1052                     }
1053
1054                     # Default operator is and
1055                     else {
1056                         $query      .= " and ";
1057                         $query      .= "$index_plus " unless $indexes_set;
1058                         $query      .= "$operand";
1059                         $query_cgi  .= "&op=and&idx=$index" if $index;
1060                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1061                         $query_desc .= " and $index_plus $operands[$i]";
1062                     }
1063                 }
1064
1065                 # There isn't a pervious operand, don't need an operator
1066                 else {
1067
1068                     # Field-weighted queries already have indexes set
1069                     $query .= " $index_plus " unless $indexes_set;
1070                     $query .= $operand;
1071                     $query_desc .= " $index_plus $operands[$i]";
1072                     $query_cgi  .= "&idx=$index" if $index;
1073                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1074                     $previous_operand = 1;
1075                 }
1076             }    #/if $operands
1077         }    # /for
1078     }
1079     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1080
1081     # add limits
1082     my $group_OR_limits;
1083     my $availability_limit;
1084     foreach my $this_limit (@limits) {
1085         if ( $this_limit =~ /available/ ) {
1086
1087 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1088 # In English:
1089 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1090             $availability_limit .=
1091 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1092             $limit_cgi  .= "&limit=available";
1093             $limit_desc .= "";
1094         }
1095
1096         # group_OR_limits, prefixed by mc-
1097         # OR every member of the group
1098         elsif ( $this_limit =~ /mc/ ) {
1099             $group_OR_limits .= " or " if $group_OR_limits;
1100             $limit_desc      .= " or " if $group_OR_limits;
1101             $group_OR_limits .= "$this_limit";
1102             $limit_cgi       .= "&limit=$this_limit";
1103             $limit_desc      .= " $this_limit";
1104         }
1105
1106         # Regular old limits
1107         else {
1108             $limit .= " and " if $limit || $query;
1109             $limit      .= "$this_limit";
1110             $limit_cgi  .= "&limit=$this_limit";
1111             $limit_desc .= " $this_limit";
1112         }
1113     }
1114     if ($group_OR_limits) {
1115         $limit .= " and " if ( $query || $limit );
1116         $limit .= "($group_OR_limits)";
1117     }
1118     if ($availability_limit) {
1119         $limit .= " and " if ( $query || $limit );
1120         $limit .= "($availability_limit)";
1121     }
1122
1123     # Normalize the query and limit strings
1124     $query =~ s/:/=/g;
1125     $limit =~ s/:/=/g;
1126     for ( $query, $query_desc, $limit, $limit_desc ) {
1127         $_ =~ s/  / /g;    # remove extra spaces
1128         $_ =~ s/^ //g;     # remove any beginning spaces
1129         $_ =~ s/ $//g;     # remove any ending spaces
1130         $_ =~ s/==/=/g;    # remove double == from query
1131     }
1132     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1133
1134     for ($query_cgi,$simple_query) {
1135         $_ =~ s/"//g;
1136     }
1137     # append the limit to the query
1138     $query .= " " . $limit;
1139
1140     # Warnings if DEBUG
1141     if ($DEBUG) {
1142         warn "QUERY:" . $query;
1143         warn "QUERY CGI:" . $query_cgi;
1144         warn "QUERY DESC:" . $query_desc;
1145         warn "LIMIT:" . $limit;
1146         warn "LIMIT CGI:" . $limit_cgi;
1147         warn "LIMIT DESC:" . $limit_desc;
1148         warn "---------";
1149         warn "Leave buildQuery";
1150         warn "---------";
1151     }
1152     return (
1153         undef,              $query, $simple_query, $query_cgi,
1154         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1155         $stopwords_removed, $query_type
1156     );
1157 }
1158
1159 =head2 searchResults
1160
1161 Format results in a form suitable for passing to the template
1162
1163 =cut
1164
1165 # IMO this subroutine is pretty messy still -- it's responsible for
1166 # building the HTML output for the template
1167 sub searchResults {
1168     my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_;
1169     my $dbh = C4::Context->dbh;
1170     my $toggle;
1171     my $even = 1;
1172     my @newresults;
1173
1174     # add search-term highlighting via <span>s on the search terms
1175     my $span_terms_hashref;
1176     for my $span_term ( split( / /, $searchdesc ) ) {
1177         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1178         $span_terms_hashref->{$span_term}++;
1179     }
1180
1181     #Build branchnames hash
1182     #find branchname
1183     #get branch information.....
1184     my %branches;
1185     my $bsth =
1186       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1187       ;    # FIXME : use C4::Koha::GetBranches
1188     $bsth->execute();
1189     while ( my $bdata = $bsth->fetchrow_hashref ) {
1190         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1191     }
1192     my %locations;
1193     my $lsch =
1194       $dbh->prepare(
1195 "SELECT authorised_value,lib FROM authorised_values WHERE category = 'LOC'"
1196       );
1197     $lsch->execute();
1198     while ( my $ldata = $lsch->fetchrow_hashref ) {
1199         $locations{ $ldata->{'authorised_value'} } = $ldata->{'lib'};
1200     }
1201
1202     #Build itemtype hash
1203     #find itemtype & itemtype image
1204     my %itemtypes;
1205     $bsth =
1206       $dbh->prepare(
1207         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1208       );
1209     $bsth->execute();
1210     while ( my $bdata = $bsth->fetchrow_hashref ) {
1211         $itemtypes{ $bdata->{'itemtype'} }->{description} =
1212           $bdata->{'description'};
1213         $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'};
1214         $itemtypes{ $bdata->{'itemtype'} }->{summary}  = $bdata->{'summary'};
1215         $itemtypes{ $bdata->{'itemtype'} }->{notforloan} =
1216           $bdata->{'notforloan'};
1217     }
1218
1219     #search item field code
1220     my $sth =
1221       $dbh->prepare(
1222 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1223       );
1224     $sth->execute;
1225     my ($itemtag) = $sth->fetchrow;
1226
1227     # get notforloan authorised value list
1228     $sth =
1229       $dbh->prepare(
1230 "SELECT authorised_value FROM `marc_subfield_structure` WHERE kohafield = 'items.notforloan' AND frameworkcode=''"
1231       );
1232     $sth->execute;
1233     my ($notforloan_authorised_value) = $sth->fetchrow;
1234
1235     ## find column names of items related to MARC
1236     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1237     $sth2->execute;
1238     my %subfieldstosearch;
1239     while ( ( my $column ) = $sth2->fetchrow ) {
1240         my ( $tagfield, $tagsubfield ) =
1241           &GetMarcFromKohaField( "items." . $column, "" );
1242         $subfieldstosearch{$column} = $tagsubfield;
1243     }
1244
1245     # handle which records to actually retrieve
1246     my $times;
1247     if ( $hits && $offset + $results_per_page <= $hits ) {
1248         $times = $offset + $results_per_page;
1249     }
1250     else {
1251         $times = $hits;
1252     }
1253
1254     # loop through all of the records we've retrieved
1255     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1256         my $marcrecord;
1257         $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1258         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1259         $oldbiblio->{result_number} = $i + 1;
1260
1261         # add imageurl to itemtype if there is one
1262         if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) {
1263             $oldbiblio->{imageurl} =
1264               $itemtypes{ $oldbiblio->{itemtype} }->{imageurl};
1265             $oldbiblio->{description} =
1266               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1267         }
1268         else {
1269             $oldbiblio->{imageurl} =
1270               getitemtypeimagesrc() . "/"
1271               . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}
1272               if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1273             $oldbiblio->{description} =
1274               $itemtypes{ $oldbiblio->{itemtype} }->{description};
1275         }
1276
1277  # Build summary if there is one (the summary is defined in the itemtypes table)
1278  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1279         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1280             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1281             my @fields  = $marcrecord->fields();
1282             foreach my $field (@fields) {
1283                 my $tag      = $field->tag();
1284                 my $tagvalue = $field->as_string();
1285                 $summary =~
1286                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1287                 unless ( $tag < 10 ) {
1288                     my @subf = $field->subfields;
1289                     for my $i ( 0 .. $#subf ) {
1290                         my $subfieldcode  = $subf[$i][0];
1291                         my $subfieldvalue = $subf[$i][1];
1292                         my $tagsubf       = $tag . $subfieldcode;
1293                         $summary =~
1294 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1295                     }
1296                 }
1297             }
1298             # FIXME: yuk
1299             $summary =~ s/\[(.*?)]//g;
1300             $summary =~ s/\n/<br>/g;
1301             $oldbiblio->{summary} = $summary;
1302         }
1303
1304 # Add search-term highlighting to the whole record where they match using <span>s
1305         if (C4::Context->preference("OpacHighlightedWords")){
1306             my $searchhighlightblob;
1307             for my $highlight_field ( $marcrecord->fields ) {
1308
1309     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1310                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1311                 for my $subfield ($highlight_field->subfields()) {
1312                     my $match;
1313                     next if $subfield->[0] eq '9';
1314                     my $field = $subfield->[1];
1315                     for my $term ( keys %$span_terms_hashref ) {
1316                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1317                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1318                         $match++;
1319                         }
1320                     }
1321                     $searchhighlightblob .= $field . " ... " if $match;
1322                 }
1323
1324             }
1325             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1326             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1327         }
1328 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1329         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1330
1331         # Add search-term highlighting to the title, subtitle, etc. fields
1332         for my $term ( keys %$span_terms_hashref ) {
1333             my $old_term = $term;
1334             if ( length($term) > 3 ) {
1335                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1336                 $oldbiblio->{'title'} =~
1337                   s/$term/<span class=\"term\">$&<\/span>/gi;
1338                 $oldbiblio->{'subtitle'} =~
1339                   s/$term/<span class=\"term\">$&<\/span>/gi;
1340                 $oldbiblio->{'author'} =~
1341                   s/$term/<span class=\"term\">$&<\/span>/gi;
1342                 $oldbiblio->{'publishercode'} =~
1343                   s/$term/<span class=\"term\">$&<\/span>/gi;
1344                 $oldbiblio->{'place'} =~
1345                   s/$term/<span class=\"term\">$&<\/span>/gi;
1346                 $oldbiblio->{'pages'} =~
1347                   s/$term/<span class=\"term\">$&<\/span>/gi;
1348                 $oldbiblio->{'notes'} =~
1349                   s/$term/<span class=\"term\">$&<\/span>/gi;
1350                 $oldbiblio->{'size'} =~
1351                   s/$term/<span class=\"term\">$&<\/span>/gi;
1352             }
1353         }
1354
1355         # FIXME:
1356         # surely there's a better way to handle this
1357         if ( $i % 2 ) {
1358             $toggle = "#ffffcc";
1359         }
1360         else {
1361             $toggle = "white";
1362         }
1363         $oldbiblio->{'toggle'} = $toggle;
1364
1365         # Pull out the items fields
1366         my @fields = $marcrecord->field($itemtag);
1367
1368         # Setting item statuses for display
1369         my @available_items_loop;
1370         my @onloan_items_loop;
1371         my @other_items_loop;
1372
1373         my $available_items;
1374         my $onloan_items;
1375         my $other_items;
1376
1377         my $ordered_count     = 0;
1378         my $available_count   = 0;
1379         my $onloan_count      = 0;
1380         my $longoverdue_count = 0;
1381         my $other_count       = 0;
1382         my $wthdrawn_count    = 0;
1383         my $itemlost_count    = 0;
1384         my $itembinding_count = 0;
1385         my $itemdamaged_count = 0;
1386         my $can_place_holds   = 0;
1387         my $items_count       = scalar(@fields);
1388         my $items_counter;
1389         my $maxitems =
1390           ( C4::Context->preference('maxItemsinSearchResults') )
1391           ? C4::Context->preference('maxItemsinSearchResults') - 1
1392           : 1;
1393
1394         # loop through every item
1395         foreach my $field (@fields) {
1396             my $item;
1397             $items_counter++;
1398
1399             # populate the items hash
1400             foreach my $code ( keys %subfieldstosearch ) {
1401                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1402             }
1403
1404       # set item's branch name, use homebranch first, fall back to holdingbranch
1405             if ( $item->{'homebranch'} ) {
1406                 $item->{'branchname'} = $branches{ $item->{homebranch} };
1407             }
1408
1409             # Last resort
1410             elsif ( $item->{'holdingbranch'} ) {
1411                 $item->{'branchname'} = $branches{ $item->{holdingbranch} };
1412             }
1413
1414 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1415             if ( $item->{onloan} ) {
1416                 $onloan_count++;
1417                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{due_date} = format_date( $item->{onloan} );
1418                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{count}++ if $item->{'homebranch'};
1419                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{branchname} = $item->{'branchname'};
1420                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{location} = $locations{ $item->{location} };
1421                 $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{itemcallnumber} = $item->{itemcallnumber};
1422         $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date}  }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1423                 # if something's checked out and lost, mark it as 'long overdue'
1424                 if ( $item->{itemlost} ) {
1425                     $onloan_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{due_date} }->{longoverdue}++;
1426                     $longoverdue_count++;
1427                 }
1428
1429                 # can place holds as long as this item isn't lost
1430                 else {
1431                     $can_place_holds = 1;
1432                 }
1433             }
1434
1435          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1436             else {
1437
1438                 # item is on order
1439                 if ( $item->{notforloan} == -1 ) {
1440                     $ordered_count++;
1441                 }
1442
1443                 # item is withdrawn, lost or damaged
1444                 if (   $item->{wthdrawn}
1445                     || $item->{itemlost}
1446                     || $item->{damaged}
1447                     || $item->{notforloan} )
1448                 {
1449                     $wthdrawn_count++    if $item->{wthdrawn};
1450                     $itemlost_count++    if $item->{itemlost};
1451                     $itemdamaged_count++ if $item->{damaged};
1452                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1453                     $other_count++;
1454
1455                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{wthdrawn} = $item->{wthdrawn};
1456                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemlost} = $item->{itemlost};
1457                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{damaged} = $item->{damaged};
1458                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{notforloan} = GetAuthorisedValueDesc( '', '', $item->{notforloan}, '', '', $notforloan_authorised_value ) if $notforloan_authorised_value;
1459                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{count}++ if $item->{'homebranch'};
1460                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{branchname} = $item->{'branchname'};
1461                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{location} = $locations{ $item->{location} };
1462                     $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{itemcallnumber} = $item->{itemcallnumber};
1463             $other_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} . $item->{status} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1464                 }
1465
1466                 # item is available
1467                 else {
1468                     $can_place_holds = 1;
1469                     $available_count++;
1470                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{count}++ if $item->{'homebranch'};
1471                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{branchname} = $item->{'branchname'};
1472                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{location} = $locations{ $item->{location} };
1473                     $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber};
1474             $available_items->{ $item->{'homebranch'} . '--' . $item->{location} . $item->{'itype'} . $item->{'itemcallnumber'} }->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl};
1475                 }
1476             }
1477         }    # notforloan, item level and biblioitem level
1478         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1479         $maxitems =
1480           ( C4::Context->preference('maxItemsinSearchResults') )
1481           ? C4::Context->preference('maxItemsinSearchResults') - 1
1482           : 1;
1483         for my $key ( sort keys %$onloan_items ) {
1484             $onloanitemscount++;
1485             push @onloan_items_loop, $onloan_items->{$key}
1486               unless $onloanitemscount > $maxitems;
1487         }
1488         for my $key ( sort keys %$other_items ) {
1489             $otheritemscount++;
1490             push @other_items_loop, $other_items->{$key}
1491               unless $otheritemscount > $maxitems;
1492         }
1493         for my $key ( sort keys %$available_items ) {
1494             $availableitemscount++;
1495             push @available_items_loop, $available_items->{$key}
1496               unless $availableitemscount > $maxitems;
1497         }
1498
1499 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1500         $can_place_holds = 0
1501           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1502         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1503         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1504         $oldbiblio->{items_count}          = $items_count;
1505         $oldbiblio->{available_items_loop} = \@available_items_loop;
1506         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1507         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1508         $oldbiblio->{availablecount}       = $available_count;
1509         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1510         $oldbiblio->{onloancount}          = $onloan_count;
1511         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1512         $oldbiblio->{othercount}           = $other_count;
1513         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1514         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1515         $oldbiblio->{itemlostcount}        = $itemlost_count;
1516         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1517         $oldbiblio->{orderedcount}         = $ordered_count;
1518         $oldbiblio->{isbn} =~
1519           s/-//g;    # deleting - in isbn to enable amazon content
1520         push( @newresults, $oldbiblio );
1521     }
1522     return @newresults;
1523 }
1524
1525 #----------------------------------------------------------------------
1526 #
1527 # Non-Zebra GetRecords#
1528 #----------------------------------------------------------------------
1529
1530 =head2 NZgetRecords
1531
1532   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1533
1534 =cut
1535
1536 sub NZgetRecords {
1537     my (
1538         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1539         $results_per_page, $offset,       $expanded_facet, $branches,
1540         $query_type,       $scan
1541     ) = @_;
1542     warn "query =$query" if $DEBUG;
1543     my $result = NZanalyse($query);
1544     warn "results =$result" if $DEBUG;
1545     return ( undef,
1546         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1547         undef );
1548 }
1549
1550 =head2 NZanalyse
1551
1552   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1553   the list is built from an inverted index in the nozebra SQL table
1554   note that title is here only for convenience : the sorting will be very fast when requested on title
1555   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1556
1557 =cut
1558
1559 sub NZanalyse {
1560     my ( $string, $server ) = @_;
1561 #     warn "---------"       if $DEBUG;
1562     warn " NZanalyse" if $DEBUG;
1563 #     warn "---------"       if $DEBUG;
1564
1565  # $server contains biblioserver or authorities, depending on what we search on.
1566  #warn "querying : $string on $server";
1567     $server = 'biblioserver' unless $server;
1568
1569 # if we have a ", replace the content to discard temporarily any and/or/not inside
1570     my $commacontent;
1571     if ( $string =~ /"/ ) {
1572         $string =~ s/"(.*?)"/__X__/;
1573         $commacontent = $1;
1574         warn "commacontent : $commacontent" if $DEBUG;
1575     }
1576
1577 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1578 # then, call again NZanalyse with $left and $right
1579 # (recursive until we find a leaf (=> something without and/or/not)
1580 # delete repeated operator... Would then go in infinite loop
1581     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1582     }
1583
1584     #process parenthesis before.
1585     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1586         my $left     = $1;
1587         my $right    = $4;
1588         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1589         warn
1590 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1591           if $DEBUG;
1592         my $leftresult = NZanalyse( $left, $server );
1593         if ($operator) {
1594             my $rightresult = NZanalyse( $right, $server );
1595
1596             # OK, we have the results for right and left part of the query
1597             # depending of operand, intersect, union or exclude both lists
1598             # to get a result list
1599             if ( $operator eq ' and ' ) {
1600                 return NZoperatorAND($leftresult,$rightresult);
1601             }
1602             elsif ( $operator eq ' or ' ) {
1603
1604                 # just merge the 2 strings
1605                 return $leftresult . $rightresult;
1606             }
1607             elsif ( $operator eq ' not ' ) {
1608                 return NZoperatorNOT($leftresult,$rightresult);
1609             }
1610         }
1611         else {
1612 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1613             return $leftresult;
1614         }
1615     }
1616     warn "string :" . $string if $DEBUG;
1617     $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/;
1618     my $left     = $1;
1619     my $right    = $3;
1620     my $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1621     warn "no parenthesis. left : $left operator: $operator right: $right"
1622       if $DEBUG;
1623
1624     # it's not a leaf, we have a and/or/not
1625     if ($operator) {
1626
1627         # reintroduce comma content if needed
1628         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1629         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1630         warn "node : $left / $operator / $right\n" if $DEBUG;
1631         my $leftresult  = NZanalyse( $left,  $server );
1632         my $rightresult = NZanalyse( $right, $server );
1633         warn " leftresult : $leftresult" if $DEBUG;
1634         warn " rightresult : $rightresult" if $DEBUG;
1635         # OK, we have the results for right and left part of the query
1636         # depending of operand, intersect, union or exclude both lists
1637         # to get a result list
1638         if ( $operator eq ' and ' ) {
1639             warn "NZAND";
1640             return NZoperatorAND($leftresult,$rightresult);
1641         }
1642         elsif ( $operator eq ' or ' ) {
1643
1644             # just merge the 2 strings
1645             return $leftresult . $rightresult;
1646         }
1647         elsif ( $operator eq ' not ' ) {
1648             return NZoperatorNOT($leftresult,$rightresult);
1649         }
1650         else {
1651
1652 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1653             die "error : operand unknown : $operator for $string";
1654         }
1655
1656         # it's a leaf, do the real SQL query and return the result
1657     }
1658     else {
1659         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1660         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1661         warn "leaf:$string" if $DEBUG;
1662
1663         # parse the string in in operator/operand/value again
1664         $string =~ /(.*)(>=|<=)(.*)/;
1665         my $left     = $1;
1666         my $operator = $2;
1667         my $right    = $3;
1668 #         warn "handling leaf... left:$left operator:$operator right:$right"
1669 #           if $DEBUG;
1670         unless ($operator) {
1671             $string =~ /(.*)(>|<|=)(.*)/;
1672             $left     = $1;
1673             $operator = $2;
1674             $right    = $3;
1675 #             warn
1676 # "handling unless (operator)... left:$left operator:$operator right:$right"
1677 #               if $DEBUG;
1678         }
1679         my $results;
1680
1681 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1682         $left =~ s/[, ].*$//;
1683
1684         # automatic replace for short operators
1685         $left = 'title'            if $left =~ '^ti$';
1686         $left = 'author'           if $left =~ '^au$';
1687         $left = 'publisher'        if $left =~ '^pb$';
1688         $left = 'subject'          if $left =~ '^su$';
1689         $left = 'koha-Auth-Number' if $left =~ '^an$';
1690         $left = 'keyword'          if $left =~ '^kw$';
1691         warn "handling leaf... left:$left operator:$operator right:$right";
1692         if ( $operator && $left ne 'keyword' ) {
1693
1694             #do a specific search
1695             my $dbh = C4::Context->dbh;
1696             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1697             my $sth =
1698               $dbh->prepare(
1699 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1700               );
1701             warn "$left / $operator / $right\n";
1702
1703             # split each word, query the DB and build the biblionumbers result
1704             #sanitizing leftpart
1705             $left =~ s/^\s+|\s+$//;
1706             foreach ( split / /, $right ) {
1707                 my $biblionumbers;
1708                 $_ =~ s/^\s+|\s+$//;
1709                 next unless $_;
1710                 warn "EXECUTE : $server, $left, $_";
1711                 $sth->execute( $server, $left, $_ )
1712                   or warn "execute failed: $!";
1713                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1714
1715 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1716 # otherwise, fill the result
1717                     $biblionumbers .= $line
1718                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1719                     warn "result : $value "
1720                       . ( $right  =~ /\d/ ) . "=="
1721                       . ( $value =~ /\D/?$line:"" );         #= $line";
1722                 }
1723
1724 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1725                 if ($results) {
1726                     warn "NZAND";
1727                     $results = NZoperatorAND($biblionumbers,$results);
1728                 }
1729                 else {
1730                     $results = $biblionumbers;
1731                 }
1732             }
1733         }
1734         else {
1735
1736       #do a complete search (all indexes), if index='kw' do complete search too.
1737             my $dbh = C4::Context->dbh;
1738             my $sth =
1739               $dbh->prepare(
1740 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1741               );
1742
1743             # split each word, query the DB and build the biblionumbers result
1744             foreach ( split / /, $string ) {
1745                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1746                 warn "search on all indexes on $_" if $DEBUG;
1747                 my $biblionumbers;
1748                 next unless $_;
1749                 $sth->execute( $server, $_ );
1750                 while ( my $line = $sth->fetchrow ) {
1751                     $biblionumbers .= $line;
1752                 }
1753
1754 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1755                 if ($results) {
1756                     $results = NZoperatorAND($biblionumbers,$results);
1757                 }
1758                 else {
1759                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1760                     $results = $biblionumbers;
1761                 }
1762             }
1763         }
1764         warn "return : $results for LEAF : $string" if $DEBUG;
1765         return $results;
1766     }
1767     warn "---------"       if $DEBUG;
1768     warn "Leave NZanalyse" if $DEBUG;
1769     warn "---------"       if $DEBUG;
1770 }
1771
1772 sub NZoperatorAND{
1773     my ($rightresult, $leftresult)=@_;
1774
1775     my @leftresult = split /;/, $leftresult;
1776     warn " @leftresult / $rightresult \n" if $DEBUG;
1777
1778     #             my @rightresult = split /;/,$leftresult;
1779     my $finalresult;
1780
1781 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1782 # the result is stored twice, to have the same weight for AND than OR.
1783 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1784 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1785     foreach (@leftresult) {
1786         my $value = $_;
1787         my $countvalue;
1788         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1789         if ( $rightresult =~ /$value-(\d+);/ ) {
1790             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1791             $finalresult .=
1792                 "$value-$countvalue;$value-$countvalue;";
1793         }
1794     }
1795     warn " $finalresult \n" if $DEBUG;
1796     return $finalresult;
1797 }
1798
1799 sub NZoperatorOR{
1800     my ($rightresult, $leftresult)=@_;
1801     return $rightresult.$leftresult;
1802 }
1803
1804 sub NZoperatorNOT{
1805     my ($rightresult, $leftresult)=@_;
1806
1807     my @leftresult = split /;/, $leftresult;
1808
1809     #             my @rightresult = split /;/,$leftresult;
1810     my $finalresult;
1811     foreach (@leftresult) {
1812         my $value=$_;
1813         $value=$1 if $value=~m/(.*)-\d+$/;
1814         unless ($rightresult =~ "$value-") {
1815             $finalresult .= "$_;";
1816         }
1817     }
1818     return $finalresult;
1819 }
1820
1821 =head2 NZorder
1822
1823   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1824
1825   TODO :: Description
1826
1827 =cut
1828
1829 sub NZorder {
1830     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1831     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1832
1833     # order title asc by default
1834     #     $ordering = '1=36 <i' unless $ordering;
1835     $results_per_page = 20 unless $results_per_page;
1836     $offset           = 0  unless $offset;
1837     my $dbh = C4::Context->dbh;
1838
1839     #
1840     # order by POPULARITY
1841     #
1842     if ( $ordering =~ /popularity/ ) {
1843         my %result;
1844         my %popularity;
1845
1846         # popularity is not in MARC record, it's builded from a specific query
1847         my $sth =
1848           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1849         foreach ( split /;/, $biblionumbers ) {
1850             my ( $biblionumber, $title ) = split /,/, $_;
1851             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1852             $sth->execute($biblionumber);
1853             my $popularity = $sth->fetchrow || 0;
1854
1855 # hint : the key is popularity.title because we can have
1856 # many results with the same popularity. In this cas, sub-ordering is done by title
1857 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1858 # (un-frequent, I agree, but we won't forget anything that way ;-)
1859             $popularity{ sprintf( "%10d", $popularity ) . $title
1860                   . $biblionumber } = $biblionumber;
1861         }
1862
1863     # sort the hash and return the same structure as GetRecords (Zebra querying)
1864         my $result_hash;
1865         my $numbers = 0;
1866         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1867             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1868                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1869                   $result{ $popularity{$key} }->as_usmarc();
1870             }
1871         }
1872         else {                                    # sort popularity ASC
1873             foreach my $key ( sort ( keys %popularity ) ) {
1874                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1875                   $result{ $popularity{$key} }->as_usmarc();
1876             }
1877         }
1878         my $finalresult = ();
1879         $result_hash->{'hits'}         = $numbers;
1880         $finalresult->{'biblioserver'} = $result_hash;
1881         return $finalresult;
1882
1883         #
1884         # ORDER BY author
1885         #
1886     }
1887     elsif ( $ordering =~ /author/ ) {
1888         my %result;
1889         foreach ( split /;/, $biblionumbers ) {
1890             my ( $biblionumber, $title ) = split /,/, $_;
1891             my $record = GetMarcBiblio($biblionumber);
1892             my $author;
1893             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1894                 $author = $record->subfield( '200', 'f' );
1895                 $author = $record->subfield( '700', 'a' ) unless $author;
1896             }
1897             else {
1898                 $author = $record->subfield( '100', 'a' );
1899             }
1900
1901 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1902 # and we don't want to get only 1 result for each of them !!!
1903             $result{ $author . $biblionumber } = $record;
1904         }
1905
1906     # sort the hash and return the same structure as GetRecords (Zebra querying)
1907         my $result_hash;
1908         my $numbers = 0;
1909         if ( $ordering eq 'author_za' ) {    # sort by author desc
1910             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1911                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1912                   $result{$key}->as_usmarc();
1913             }
1914         }
1915         else {                               # sort by author ASC
1916             foreach my $key ( sort ( keys %result ) ) {
1917                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1918                   $result{$key}->as_usmarc();
1919             }
1920         }
1921         my $finalresult = ();
1922         $result_hash->{'hits'}         = $numbers;
1923         $finalresult->{'biblioserver'} = $result_hash;
1924         return $finalresult;
1925
1926         #
1927         # ORDER BY callnumber
1928         #
1929     }
1930     elsif ( $ordering =~ /callnumber/ ) {
1931         my %result;
1932         foreach ( split /;/, $biblionumbers ) {
1933             my ( $biblionumber, $title ) = split /,/, $_;
1934             my $record = GetMarcBiblio($biblionumber);
1935             my $callnumber;
1936             my ( $callnumber_tag, $callnumber_subfield ) =
1937               GetMarcFromKohaField( $dbh, 'items.itemcallnumber' );
1938             ( $callnumber_tag, $callnumber_subfield ) =
1939               GetMarcFromKohaField('biblioitems.callnumber')
1940               unless $callnumber_tag;
1941             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1942                 $callnumber = $record->subfield( '200', 'f' );
1943             }
1944             else {
1945                 $callnumber = $record->subfield( '100', 'a' );
1946             }
1947
1948 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1949 # and we don't want to get only 1 result for each of them !!!
1950             $result{ $callnumber . $biblionumber } = $record;
1951         }
1952
1953     # sort the hash and return the same structure as GetRecords (Zebra querying)
1954         my $result_hash;
1955         my $numbers = 0;
1956         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1957             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1958                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1959                   $result{$key}->as_usmarc();
1960             }
1961         }
1962         else {                                     # sort by title ASC
1963             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1964                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1965                   $result{$key}->as_usmarc();
1966             }
1967         }
1968         my $finalresult = ();
1969         $result_hash->{'hits'}         = $numbers;
1970         $finalresult->{'biblioserver'} = $result_hash;
1971         return $finalresult;
1972     }
1973     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1974         my %result;
1975         foreach ( split /;/, $biblionumbers ) {
1976             my ( $biblionumber, $title ) = split /,/, $_;
1977             my $record = GetMarcBiblio($biblionumber);
1978             my ( $publicationyear_tag, $publicationyear_subfield ) =
1979               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1980             my $publicationyear =
1981               $record->subfield( $publicationyear_tag,
1982                 $publicationyear_subfield );
1983
1984 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1985 # and we don't want to get only 1 result for each of them !!!
1986             $result{ $publicationyear . $biblionumber } = $record;
1987         }
1988
1989     # sort the hash and return the same structure as GetRecords (Zebra querying)
1990         my $result_hash;
1991         my $numbers = 0;
1992         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
1993             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1994                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1995                   $result{$key}->as_usmarc();
1996             }
1997         }
1998         else {                                 # sort by pub year ASC
1999             foreach my $key ( sort ( keys %result ) ) {
2000                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2001                   $result{$key}->as_usmarc();
2002             }
2003         }
2004         my $finalresult = ();
2005         $result_hash->{'hits'}         = $numbers;
2006         $finalresult->{'biblioserver'} = $result_hash;
2007         return $finalresult;
2008
2009         #
2010         # ORDER BY title
2011         #
2012     }
2013     elsif ( $ordering =~ /title/ ) {
2014
2015 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2016         my %result;
2017         foreach ( split /;/, $biblionumbers ) {
2018             my ( $biblionumber, $title ) = split /,/, $_;
2019
2020 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2021 # and we don't want to get only 1 result for each of them !!!
2022 # hint & speed improvement : we can order without reading the record
2023 # so order, and read records only for the requested page !
2024             $result{ $title . $biblionumber } = $biblionumber;
2025         }
2026
2027     # sort the hash and return the same structure as GetRecords (Zebra querying)
2028         my $result_hash;
2029         my $numbers = 0;
2030         if ( $ordering eq 'title_az' ) {    # sort by title desc
2031             foreach my $key ( sort ( keys %result ) ) {
2032                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2033             }
2034         }
2035         else {                              # sort by title ASC
2036             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2037                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2038             }
2039         }
2040
2041         # limit the $results_per_page to result size if it's more
2042         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2043
2044         # for the requested page, replace biblionumber by the complete record
2045         # speed improvement : avoid reading too much things
2046         for (
2047             my $counter = $offset ;
2048             $counter <= $offset + $results_per_page ;
2049             $counter++
2050           )
2051         {
2052             $result_hash->{'RECORDS'}[$counter] =
2053               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2054         }
2055         my $finalresult = ();
2056         $result_hash->{'hits'}         = $numbers;
2057         $finalresult->{'biblioserver'} = $result_hash;
2058         return $finalresult;
2059     }
2060     else {
2061
2062 #
2063 # order by ranking
2064 #
2065 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2066         my %result;
2067         my %count_ranking;
2068         foreach ( split /;/, $biblionumbers ) {
2069             my ( $biblionumber, $title ) = split /,/, $_;
2070             $title =~ /(.*)-(\d)/;
2071
2072             # get weight
2073             my $ranking = $2;
2074
2075 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2076 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2077 # biblio N has ranking = 6
2078             $count_ranking{$biblionumber} += $ranking;
2079         }
2080
2081 # build the result by "inverting" the count_ranking hash
2082 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2083 #         warn "counting";
2084         foreach ( keys %count_ranking ) {
2085             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2086         }
2087
2088     # sort the hash and return the same structure as GetRecords (Zebra querying)
2089         my $result_hash;
2090         my $numbers = 0;
2091         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2092             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2093         }
2094
2095         # limit the $results_per_page to result size if it's more
2096         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2097
2098         # for the requested page, replace biblionumber by the complete record
2099         # speed improvement : avoid reading too much things
2100         for (
2101             my $counter = $offset ;
2102             $counter <= $offset + $results_per_page ;
2103             $counter++
2104           )
2105         {
2106             $result_hash->{'RECORDS'}[$counter] =
2107               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2108               if $result_hash->{'RECORDS'}[$counter];
2109         }
2110         my $finalresult = ();
2111         $result_hash->{'hits'}         = $numbers;
2112         $finalresult->{'biblioserver'} = $result_hash;
2113         return $finalresult;
2114     }
2115 }
2116
2117 =head2 ModBiblios
2118
2119 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2120
2121 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2122 test parameter if set donot perform change to records in database.
2123
2124 =over 2
2125
2126 =item C<input arg:>
2127
2128     * $listbiblios is an array ref to marcrecords to be changed
2129     * $tagsubfield is the reference of the subfield to change.
2130     * $initvalue is the value to search the record for
2131     * $targetvalue is the value to set the subfield to
2132     * $test is to be set only not to perform changes in database.
2133
2134 =item C<Output arg:>
2135     * $countchanged counts all the changes performed.
2136     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2137
2138 =item C<usage in the script:>
2139
2140 =back
2141
2142 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2143 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2144 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2145
2146 =cut
2147
2148 sub ModBiblios {
2149     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2150     my $countmatched;
2151     my @unmatched;
2152     my ( $tag, $subfield ) = ( $1, $2 )
2153       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2154     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2155         $tag = $tag . $subfield;
2156         undef $subfield;
2157     }
2158     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber');
2159     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber');
2160     if ($tag eq $itemtag) {
2161         # do not allow the embedded item tag to be
2162         # edited from here
2163         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2164         return (0, []);
2165     }
2166     foreach my $usmarc (@$listbiblios) {
2167         my $record;
2168         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2169         my $biblionumber;
2170         if ($@) {
2171
2172             # usmarc is not a valid usmarc May be a biblionumber
2173             # FIXME - sorry, please let's figure out whether
2174             #         this function is to be passed a list of
2175             #         record numbers or a list of MARC::Record
2176             #         objects.  The former is probably better
2177             #         because the MARC records supplied by Zebra
2178             #         may be not current.
2179             $record       = GetMarcBiblio($usmarc);
2180             $biblionumber = $usmarc;
2181         }
2182         else {
2183             if ( $bntag >= 010 ) {
2184                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2185             }
2186             else {
2187                 $biblionumber = $record->field($bntag)->data;
2188             }
2189         }
2190
2191         #GetBiblionumber is to be written.
2192         #Could be replaced by TransformMarcToKoha (But Would be longer)
2193         if ( $record->field($tag) ) {
2194             my $modify = 0;
2195             foreach my $field ( $record->field($tag) ) {
2196                 if ($subfield) {
2197                     if (
2198                         $field->delete_subfield(
2199                             'code'  => $subfield,
2200                             'match' => qr($initvalue)
2201                         )
2202                       )
2203                     {
2204                         $countmatched++;
2205                         $modify = 1;
2206                         $field->update( $subfield, $targetvalue )
2207                           if ($targetvalue);
2208                     }
2209                 }
2210                 else {
2211                     if ( $tag >= 010 ) {
2212                         if ( $field->delete_field($field) ) {
2213                             $countmatched++;
2214                             $modify = 1;
2215                         }
2216                     }
2217                     else {
2218                         $field->data = $targetvalue
2219                           if ( $field->data =~ qr($initvalue) );
2220                     }
2221                 }
2222             }
2223
2224             #       warn $record->as_formatted;
2225             if ($modify) {
2226                 ModBiblio( $record, $biblionumber,
2227                     GetFrameworkCode($biblionumber) )
2228                   unless ($test);
2229             }
2230             else {
2231                 push @unmatched, $biblionumber;
2232             }
2233         }
2234         else {
2235             push @unmatched, $biblionumber;
2236         }
2237     }
2238     return ( $countmatched, \@unmatched );
2239 }
2240
2241 END { }    # module clean-up code here (global destructor)
2242
2243 1;
2244 __END__
2245
2246 =head1 AUTHOR
2247
2248 Koha Developement team <info@koha.org>
2249
2250 =cut