Nick Clemens
7bbf4f7b5f
Previously we put all the servers into an object with keys of the server id This patch converts it to an array of objects to preserve order, and adjusts code to use the array index where necessary and store the server id within the array To test: 1 - Add some new Z3950 servers, they don't need to be valid FIRST SECOND THIRD FOURTH 2 - Adjust the ranking so FOURTH:1 THIRD:2 SECOND:3 FIRST:4 3 - Enable and launch the advanced editor 4 - Click 'Advanced' under search on the left 5 - Note the list displays in the order you entered the servers 6 - Apply patch 7 - Reload 8 - Order is correct 9 - With valid servers, confirm that searching still works and servers can be checked or unchecked to include/remove from results https://bugs.koha-community.org/show_bug.cgi?id=17515 Signed-off-by: B Johnson <barbara.johnson@bedfordtx.gov> Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de> Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
351 lines
10 KiB
Perl
351 lines
10 KiB
Perl
package Koha::MetaSearcher;
|
|
|
|
# Copyright 2014 ByWater Solutions
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
use Modern::Perl;
|
|
|
|
use base 'Class::Accessor';
|
|
|
|
use C4::Charset qw( MarcToUTF8Record SetUTF8Flag );
|
|
use C4::Search qw(); # Purely for new_record_from_zebra
|
|
use DBIx::Class::ResultClass::HashRefInflator;
|
|
use IO::Select;
|
|
use Koha::Caches;
|
|
use Koha::Database;
|
|
use Koha::MetadataRecord;
|
|
use MARC::File::XML;
|
|
use Storable qw( store_fd fd_retrieve );
|
|
use Time::HiRes qw( clock_gettime CLOCK_MONOTONIC );
|
|
use UUID;
|
|
use ZOOM;
|
|
|
|
use sort 'stable';
|
|
|
|
__PACKAGE__->mk_accessors( qw( fetch offset on_error resultset ) );
|
|
|
|
sub new {
|
|
my ( $class, $options ) = @_;
|
|
|
|
my ( $uuid, $uuidstring );
|
|
UUID::generate($uuid);
|
|
UUID::unparse( $uuid, $uuidstring );
|
|
|
|
return bless {
|
|
offset => 0,
|
|
fetch => 100,
|
|
on_error => sub {},
|
|
results => [],
|
|
resultset => $uuidstring,
|
|
%{ $options || {} }
|
|
}, $class;
|
|
}
|
|
|
|
sub handle_hit {
|
|
my ( $self, $index, $server, $marcrecord ) = @_;
|
|
|
|
my $record = Koha::MetadataRecord->new( { schema => 'marc', record => $marcrecord } );
|
|
|
|
my %fetch = (
|
|
title => 'biblio.title',
|
|
subtitle => 'biblio.subtitle',
|
|
seriestitle => 'biblio.seriestitle',
|
|
author => 'biblio.author',
|
|
isbn =>'biblioitems.isbn',
|
|
issn =>'biblioitems.issn',
|
|
lccn =>'biblioitems.lccn', #LC control number (not call number)
|
|
edition =>'biblioitems.editionstatement',
|
|
date => 'biblio.copyrightdate', #MARC21
|
|
date2 => 'biblioitems.publicationyear', #UNIMARC
|
|
);
|
|
|
|
my $metadata = {};
|
|
while ( my ( $key, $kohafield ) = each %fetch ) {
|
|
$metadata->{$key} = $record->getKohaField($kohafield);
|
|
}
|
|
$metadata->{date} //= $metadata->{date2};
|
|
|
|
push @{ $self->{results} }, {
|
|
server => $server,
|
|
index => $index,
|
|
record => $marcrecord,
|
|
metadata => $metadata,
|
|
};
|
|
}
|
|
|
|
sub search {
|
|
my ( $self, $server_ids, $query ) = @_;
|
|
|
|
my $resultset_expiry = 300;
|
|
|
|
my $cache = Koha::Caches->get_instance();
|
|
my $schema = Koha::Database->new->schema;
|
|
my $stats = {
|
|
num_fetched => {
|
|
map { $_ => 0 } @$server_ids
|
|
},
|
|
num_hits => {
|
|
map { $_ => 0 } @$server_ids
|
|
},
|
|
total_fetched => 0,
|
|
total_hits => 0,
|
|
};
|
|
my $start = clock_gettime( CLOCK_MONOTONIC );
|
|
my $select = IO::Select->new;
|
|
|
|
my @cached_sets;
|
|
my @servers;
|
|
|
|
foreach my $server_id ( @$server_ids ) {
|
|
if ( $server_id =~ /^\d+$/ ) {
|
|
# Z39.50 server
|
|
my $server = $schema->resultset('Z3950server')->find(
|
|
{ id => $server_id },
|
|
{ result_class => 'DBIx::Class::ResultClass::HashRefInflator' },
|
|
);
|
|
$server->{type} = 'z3950';
|
|
|
|
push @servers, $server;
|
|
} elsif ( $server_id =~ /(\w+)(?::(\w+))?/ ) {
|
|
# Special server
|
|
push @servers, {
|
|
type => $1,
|
|
extra => $2,
|
|
id => $server_id,
|
|
host => $server_id,
|
|
servername => $server_id,
|
|
};
|
|
}
|
|
}
|
|
|
|
# HashRefInflator is used so that the information will survive into the fork
|
|
foreach my $server ( @servers ) {
|
|
if ( $cache ) {
|
|
my $set = $cache->get_from_cache( 'z3950-resultset-' . $self->resultset . '-' . $server->{id} );
|
|
if ( ref($set) eq 'HASH' ) {
|
|
$set->{server} = $server;
|
|
push @cached_sets, $set;
|
|
next;
|
|
}
|
|
}
|
|
|
|
$select->add( $self->_start_worker( $server, $query ) );
|
|
}
|
|
|
|
# Handle these while the servers are searching
|
|
foreach my $set ( @cached_sets ) {
|
|
$self->_handle_hits( $stats, $set );
|
|
}
|
|
|
|
while ( $select->count ) {
|
|
foreach my $readfh ( $select->can_read() ) {
|
|
my $result = fd_retrieve( $readfh );
|
|
|
|
$select->remove( $readfh );
|
|
close $readfh;
|
|
wait;
|
|
|
|
next if ( ref $result ne 'HASH' );
|
|
|
|
if ( $result->{error} ) {
|
|
$self->{on_error}->( $result->{server}, $result->{error} );
|
|
next;
|
|
}
|
|
|
|
$self->_handle_hits( $stats, $result );
|
|
|
|
if ( $cache ) {
|
|
$cache->set_in_cache( 'z3950-resultset-' . $self->resultset . '-' . $result->{server}->{id}, {
|
|
hits => $result->{hits},
|
|
num_fetched => $result->{num_fetched},
|
|
num_hits => $result->{num_hits},
|
|
}, { expiry => $resultset_expiry } );
|
|
}
|
|
}
|
|
}
|
|
|
|
$stats->{time} = clock_gettime( CLOCK_MONOTONIC ) - $start;
|
|
|
|
return $stats;
|
|
}
|
|
|
|
sub _start_worker {
|
|
my ( $self, $server, $query ) = @_;
|
|
pipe my $readfh, my $writefh;
|
|
|
|
# Accessing the cache or Koha database after the fork is risky, so get any resources we need
|
|
# here.
|
|
my $pid;
|
|
my $marcflavour = C4::Context->preference('marcflavour');
|
|
|
|
if ( ( $pid = fork ) ) {
|
|
# Parent process
|
|
close $writefh;
|
|
|
|
return $readfh;
|
|
} elsif ( !defined $pid ) {
|
|
# Error
|
|
|
|
$self->{on_error}->( $server, 'Failed to fork' );
|
|
return;
|
|
}
|
|
|
|
close $readfh;
|
|
my $connection;
|
|
my ( $num_hits, $num_fetched, $hits, $results );
|
|
|
|
eval {
|
|
if ( $server->{type} eq 'z3950' ) {
|
|
my $zoptions = ZOOM::Options->new();
|
|
$zoptions->option( 'elementSetName', 'F' );
|
|
$zoptions->option( 'databaseName', $server->{db} );
|
|
$zoptions->option( 'user', $server->{userid} ) if $server->{userid};
|
|
$zoptions->option( 'password', $server->{password} ) if $server->{password};
|
|
$zoptions->option( 'preferredRecordSyntax', $server->{syntax} );
|
|
$zoptions->option( 'timeout', $server->{timeout} ) if $server->{timeout};
|
|
|
|
$connection = ZOOM::Connection->create($zoptions);
|
|
|
|
$connection->connect( $server->{host}, $server->{port} );
|
|
$results = $connection->search_pqf( $query ); # Starts the search
|
|
} elsif ( $server->{type} eq 'koha' ) {
|
|
$connection = C4::Context->Zconn( $server->{extra} );
|
|
$results = $connection->search_pqf( $query ); # Starts the search
|
|
} elsif ( $server->{type} eq 'batch' ) {
|
|
$server->{encoding} = 'utf-8';
|
|
}
|
|
};
|
|
if ($@) {
|
|
store_fd {
|
|
error => $connection ? $connection->exception() : $@,
|
|
server => $server,
|
|
}, $writefh;
|
|
exit;
|
|
}
|
|
|
|
if ( $server->{type} eq 'batch' ) {
|
|
# TODO: actually handle PQF
|
|
$query =~ s/@\w+ (?:\d+=\d+ )?//g;
|
|
$query =~ s/"//g;
|
|
|
|
my $schema = Koha::Database->new->schema;
|
|
$schema->storage->debug(1);
|
|
my $match_condition = [ map +{ -like => '%' . $_ . '%' }, split( /\s+/, $query ) ];
|
|
$hits = [ $schema->resultset('ImportRecord')->search(
|
|
{
|
|
import_batch_id => $server->{extra},
|
|
-or => [
|
|
{ 'import_biblios.title' => $match_condition },
|
|
{ 'import_biblios.author' => $match_condition },
|
|
{ 'import_biblios.isbn' => $match_condition },
|
|
{ 'import_biblios.issn' => $match_condition },
|
|
],
|
|
},
|
|
{
|
|
join => [ qw( import_biblios ) ],
|
|
rows => $self->{fetch},
|
|
}
|
|
)->get_column( 'marc' )->all ];
|
|
|
|
$num_hits = $num_fetched = scalar @$hits;
|
|
} else {
|
|
$num_hits = $results->size;
|
|
$num_fetched = ( $self->{offset} + $self->{fetch} ) < $num_hits ? $self->{fetch} : $num_hits;
|
|
|
|
$hits = [ map { $_->raw() } @{ $results->records( $self->{offset}, $num_fetched, 1 ) } ];
|
|
}
|
|
|
|
if ( !@$hits && $connection && $connection->exception() ) {
|
|
store_fd {
|
|
error => $connection->exception(),
|
|
server => $server,
|
|
}, $writefh;
|
|
exit;
|
|
}
|
|
|
|
if ( $server->{type} eq 'koha' ) {
|
|
$hits = [ map { C4::Search::new_record_from_zebra( $server->{extra}, $_ ) } @$hits ];
|
|
} else {
|
|
$hits = [ map { $self->_import_record( $_, $marcflavour, $server->{encoding} ? $server->{encoding} : "iso-5426" ) } @$hits ];
|
|
}
|
|
|
|
store_fd {
|
|
hits => $hits,
|
|
num_fetched => $num_fetched,
|
|
num_hits => $num_hits,
|
|
server => $server,
|
|
}, $writefh;
|
|
|
|
exit;
|
|
}
|
|
|
|
sub _import_record {
|
|
my ( $self, $raw, $marcflavour, $encoding ) = @_;
|
|
|
|
my ( $marcrecord ) = MarcToUTF8Record( $raw, $marcflavour, $encoding ); #ignores charset return values
|
|
|
|
SetUTF8Flag($marcrecord);
|
|
return $marcrecord;
|
|
}
|
|
|
|
sub _handle_hits {
|
|
my ( $self, $stats, $set ) = @_;
|
|
|
|
my $server = $set->{server};
|
|
|
|
my $num_hits = $stats->{num_hits}->{ $server->{id} } = $set->{num_hits};
|
|
my $num_fetched = $stats->{num_fetched}->{ $server->{id} } = $set->{num_fetched};
|
|
|
|
$stats->{total_hits} += $num_hits;
|
|
$stats->{total_fetched} += $num_fetched;
|
|
|
|
foreach my $j ( 0..$#{ $set->{hits} } ) {
|
|
$self->handle_hit( $self->{offset} + $j, $server, $set->{hits}->[$j] );
|
|
}
|
|
}
|
|
|
|
sub sort {
|
|
my ( $self, $key, $direction ) = @_;
|
|
|
|
my $empty_flip = -1; # Determines the flip of ordering for records with empty sort keys.
|
|
|
|
foreach my $hit ( @{ $self->{results} } ) {
|
|
( $hit->{sort_key} = $hit->{metadata}->{$key} || '' ) =~ s/\W//g;
|
|
}
|
|
|
|
$self->{results} = [ sort {
|
|
# Sort empty records at the end
|
|
return -$empty_flip unless $a->{sort_key};
|
|
return $empty_flip unless $b->{sort_key};
|
|
|
|
$direction * ( $a->{sort_key} cmp $b->{sort_key} );
|
|
} @{ $self->{results} } ];
|
|
}
|
|
|
|
sub results {
|
|
my ( $self, $offset, $length ) = @_;
|
|
|
|
my @subset;
|
|
|
|
foreach my $i ( $offset..( $offset + $length - 1 ) ) {
|
|
push @subset, $self->{results}->[$i] if $self->{results}->[$i];
|
|
}
|
|
|
|
return @subset;
|
|
}
|
|
|
|
1;
|