Browse Source

Bug 13937: Add support for SRU and Elasticsearch

Splits Session to GenericSession and ZebraSession where Generic supports any
search backend via the SearchEngine classes and Zebra maintains the direct
channel to the Zebra server.

Adds config files required for mapping BIB-1 attributes to Koha search fields
and SRU indexes to BIB-1 attributes.

Adds PODs.

Sponsored-by: National Library of Finland
Signed-off-by: Stefan Berndtsson <stefan.berndtsson@ub.gu.se>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
remotes/origin/19.11.x
Ere Maijala 6 years ago
committed by Martin Renvoize
parent
commit
ac28d8043d
Signed by: martin.renvoize GPG Key ID: 422B469130441A0F
  1. 110
      Koha/Z3950Responder.pm
  2. 113
      Koha/Z3950Responder/GenericSession.pm
  3. 117
      Koha/Z3950Responder/RPN.pm
  4. 344
      Koha/Z3950Responder/Session.pm
  5. 163
      Koha/Z3950Responder/ZebraSession.pm
  6. 46
      etc/z3950/attribute_mappings.yaml
  7. 12
      etc/z3950/config.xml
  8. 162
      etc/z3950/pqf.properties
  9. 28
      misc/z3950_responder.pl

110
Koha/Z3950Responder.pm

@ -24,11 +24,44 @@ use Modern::Perl;
use C4::Biblio qw( GetMarcFromKohaField );
use C4::Koha qw( GetAuthorisedValues );
use Koha;
use Koha::Z3950Responder::Session;
use Net::Z3950::SimpleServer;
=head1 NAME
Koha::Z3950Responder - Main class for interfacing with Net::Z3950::SimpleServer
=head1 SYNOPSIS
use Koha::Z3950Responder;
my $z = Koha::Z3950Responder->new( {
add_item_status_subfield => 1,
add_status_multi_subfield => 1,
debug => 0,
num_to_prefetch => 20,
config_dir => '/home/koha/etc',
yaz_options => [ ],
} );
$z->start();
=head1 DESCRIPTION
A daemon class that interfaces with Net::Z3950::SimpleServer to provider Z39.50/SRU
service. Uses a Session class for the actual functionality.
=head1 METHODS
=head2 INSTANCE METHODS
=head3 new
$self->new({
add_item_status_subfield => 1
});
=cut
sub new {
my ( $class, $config ) = @_;
@ -66,10 +99,15 @@ sub new {
unshift @{ $self->{yaz_options} }, '-v', 'none,fatal';
}
# Set main config for SRU support
unshift @{ $self->{yaz_options} }, '-f', $self->{config_dir} . 'config.xml' if $self->{config_dir};
# Set num to prefetch if not passed
$self->{num_to_prefetch} //= 20;
$self->{server} = Net::Z3950::SimpleServer->new(
INIT => sub { $self->init_handler(@_) },
SEARCH => sub { $self->search_handler(@_) },
PRESENT => sub { $self->present_handler(@_) },
FETCH => sub { $self->fetch_handler(@_) },
CLOSE => sub { $self->close_handler(@_) },
);
@ -77,25 +115,52 @@ sub new {
return bless( $self, $class );
}
=head3 start
$z->start();
Start the daemon and begin serving requests. Does not return unless initialization fails or a
fatal error occurs.
=cut
sub start {
my ( $self ) = @_;
$self->{server}->launch_server( 'Koha::Z3950Responder', @{ $self->{yaz_options} } )
}
# The rest of these methods are SimpleServer callbacks bound to this Z3950Responder object. It's
# worth noting that these callbacks don't return anything; they both receive and return data in the
# $args hashref.
=head2 CALLBACKS
These methods are SimpleServer callbacks bound to this Z3950Responder object.
It's worth noting that these callbacks don't return anything; they both
receive and return data in the $args hashref.
=head3 init_handler
Callback that is called when a new connection is initialized
=cut
sub init_handler {
# Called when the client first connects.
my ( $self, $args ) = @_;
# This holds all of the per-connection state.
my $session = Koha::Z3950Responder::Session->new({
server => $self,
peer => $args->{PEER_NAME},
});
my $session;
if (C4::Context->preference('SearchEngine') eq 'Zebra') {
use Koha::Z3950Responder::ZebraSession;
$session = Koha::Z3950Responder::ZebraSession->new({
server => $self,
peer => $args->{PEER_NAME},
});
} else {
use Koha::Z3950Responder::GenericSession;
$session = Koha::Z3950Responder::GenericSession->new({
server => $self,
peer => $args->{PEER_NAME}
});
}
$args->{HANDLE} = $session;
@ -103,27 +168,36 @@ sub init_handler {
$args->{IMP_VER} = Koha::version;
}
=head3 search_handler
Callback that is called when a new search is performed
=cut
sub search_handler {
# Called when search is first sent.
my ( $self, $args ) = @_;
$args->{HANDLE}->search_handler($args);
}
sub present_handler {
# Called when a set of records is requested.
my ( $self, $args ) = @_;
=head3 fetch_handler
$args->{HANDLE}->present_handler($args);
}
Callback that is called when records are requested
=cut
sub fetch_handler {
# Called when a given record is requested.
my ( $self, $args ) = @_;
$args->{HANDLE}->fetch_handler( $args );
}
=head3 close_handler
Callback that is called when a session is terminated
=cut
sub close_handler {
my ( $self, $args ) = @_;

113
Koha/Z3950Responder/GenericSession.pm

@ -0,0 +1,113 @@
#!/usr/bin/perl
package Koha::Z3950Responder::GenericSession;
# Copyright The National Library of Finland 2018
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with Koha; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use Modern::Perl;
use base qw( Koha::Z3950Responder::Session );
use Koha::Logger;
use Koha::SearchEngine::Search;
use Koha::SearchEngine::QueryBuilder;
use Koha::Z3950Responder::RPN;
=head1 NAME
Koha::Z3950Responder::genericSession
=head1 SYNOPSIS
Backend-agnostic session class that uses C<Koha::Session> as the base class. Utilizes
C<Koha::SearchEngine> for the actual functionality.
=head2 INSTANCE METHODS
=head3 start_search
my ($resultset, $hits) = $self->start_search( $args, $self->{server}->{num_to_prefetch} );
Perform a search using C<Koha::SearchEngine>'s QueryBuilder and Search.
=cut
sub start_search {
my ( $self, $args, $num_to_prefetch ) = @_;
if (!defined $self->{'attribute_mappings'}) {
require YAML;
$self->{'attribute_mappings'} = YAML::LoadFile($self->{server}->{config_dir} . 'attribute_mappings.yaml');
}
my $database = $args->{DATABASES}->[0];
my $builder = Koha::SearchEngine::QueryBuilder->new({ index => $database });
my $searcher = Koha::SearchEngine::Search->new({ index => $database });
my $built_query;
my $query = $args->{RPN}->{'query'}->to_koha($self->{'attribute_mappings'}->{$database});
$self->log_debug(" parsed search: $query");
my @operands = $query;
(undef, $built_query) = $builder->build_query_compat( undef, \@operands, undef, undef, undef, 0);
my ($error, $marcresults, $hits ) = $searcher->simple_search_compat($built_query, 0, $num_to_prefetch);
if (defined $error) {
$self->set_error($args, $self->ERR_SEARCH_FAILED, 'Search failed');
return;
}
my $resultset = {
query => $built_query,
database => $database,
cached_offset => 0,
cached_results => $marcresults,
hits => $hits
};
return ($resultset, $hits);
}
=head3 fetch_record
my $record = $self->fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} );
Fetch a record from SearchEngine. Caches records in session to avoid too many fetches.
=cut
sub fetch_record {
my ( $self, $resultset, $args, $index, $num_to_prefetch ) = @_;
# Fetch more records if necessary
my $offset = $args->{OFFSET} - 1;
if ($offset < $resultset->{cached_offset} || $offset >= $resultset->{cached_offset} + $num_to_prefetch) {
$self->log_debug(" fetch uncached, fetching $num_to_prefetch records starting at $offset");
my $searcher = Koha::SearchEngine::Search->new({ index => $resultset->{'database'} });
my ($error, $marcresults, $num_hits ) = $searcher->simple_search_compat($resultset->{'query'}, $offset, $num_to_prefetch);
if (defined $error) {
$self->set_error($args, $self->ERR_TEMPORARY_ERROR, 'Fetch failed');
return;
}
$resultset->{cached_offset} = $offset;
$resultset->{cached_results} = $marcresults;
}
return $resultset->{cached_results}[$offset - $resultset->{cached_offset}];
}
1;

117
Koha/Z3950Responder/RPN.pm

@ -0,0 +1,117 @@
#!/usr/bin/perl
# Copyright The National Library of Finland 2018
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under thes
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with Koha; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use Modern::Perl;
=head1 NAME
Koha::Z3950Responder::RPN
=head1 SYNOPSIS
Overrides for the C<Net::Z3950::RPN> classes adding a C<to_koha> method that
converts the query to a syntax that C<Koha::SearchEngine> understands.
=head1 DESCRIPTION
The method used here is described in C<samples/render-search.pl> of
C<Net::Z3950::SimpleServer>.
=cut
package Net::Z3950::RPN::Term;
sub to_koha {
my ($self, $mappings) = @_;
my $attrs = $self->{'attributes'};
my $fields = $mappings->{use}{default} // '_all';
my $split = 0;
my $quote = '';
my $prefix = '';
my $suffix = '';
my $term = $self->{'term'};
if ($attrs) {
foreach my $attr (@$attrs) {
if ($attr->{'attributeType'} == 1) { # use
my $use = $attr->{'attributeValue'};
$fields = $mappings->{use}{$use} if defined $mappings->{use}{$use};
} elsif ($attr->{'attributeType'} == 4) { # structure
$split = 1 if ($attr->{'attributeValue'} == 2);
$quote = '"' if ($attr->{'attributeValue'} == 1);
} elsif ($attr->{'attributeType'} == 5) { # truncation
my $truncation = $attr->{'attributeValue'};
$prefix = '*' if ($truncation == 2 || $truncation == 3);
$suffix = '*' if ($truncation == 1 || $truncation == 3);
}
}
}
$fields = [$fields] unless ref($fields) eq 'ARRAY';
if ($split) {
my @terms;
foreach my $word (split(/\s/, $term)) {
$word =~ s/^[\,\.;:\\\/\"\'\-\=]+//g;
$word =~ s/[\,\.;:\\\/\"\'\-\=]+$//g;
next if (!$word);
my @words;
foreach my $field (@{$fields}) {
push(@words, "$field:($prefix$word$suffix)");
}
push (@terms, join(' OR ', @words));
}
return '(' . join(' AND ', @terms) . ')';
}
my @terms;
foreach my $field (@{$fields}) {
push(@terms, "$field:($prefix$term$suffix)");
}
return '(' . join(' OR ', @terms) . ')';
}
package Net::Z3950::RPN::And;
sub to_koha
{
my ($self, $mappings) = @_;
return '(' . $self->[0]->to_koha($mappings) . ' AND ' .
$self->[1]->to_koha($mappings) . ')';
}
package Net::Z3950::RPN::Or;
sub to_koha
{
my ($self, $mappings) = @_;
return '(' . $self->[0]->to_koha($mappings) . ' OR ' .
$self->[1]->to_koha($mappings) . ')';
}
package Net::Z3950::RPN::AndNot;
sub to_koha
{
my ($self, $mappings) = @_;
return '(' . $self->[0]->to_koha($mappings) . ' NOT ' .
$self->[1]->to_koha($mappings) . ')';
}
1;

344
Koha/Z3950Responder/Session.pm

@ -28,7 +28,28 @@ use C4::Reserves qw( GetReserveStatus );
use C4::Search qw();
use Koha::Logger;
use ZOOM;
=head1 NAME
Koha::Z3950Responder::Session
=head1 SYNOPSIS
An abstract class where backend-specific session modules are derived from.
Z3950Responder creates one of the child classes depending on the SearchEngine
preference.
=head1 DESCRIPTION
This class contains common functions for handling searching for and fetching
of records. It can optionally add item status information to the returned
records. The backend-specific abstract methods need to be implemented in a
child class.
=head2 CONSTANTS
OIDs and diagnostic codes used in Z39.50
=cut
use constant {
UNIMARC_OID => '1.2.840.10003.5.1',
@ -41,10 +62,26 @@ use constant {
ERR_PRESENT_OUT_OF_RANGE => 13,
ERR_RECORD_TOO_LARGE => 16,
ERR_NO_SUCH_RESULTSET => 30,
ERR_SYNTAX_UNSUPPORTED => 230,
ERR_SEARCH_FAILED => 125,
ERR_SYNTAX_UNSUPPORTED => 239,
ERR_DB_DOES_NOT_EXIST => 235,
};
=head1 FUNCTIONS
=head2 INSTANCE METHODS
=head3 new
my $session = $self->new({
server => $z3950responder,
peer => 'PEER NAME'
});
Instantiate a Session
=cut
sub new {
my ( $class, $args ) = @_;
@ -58,189 +95,70 @@ sub new {
$self->{logger}->debug_to_screen();
}
$self->_log_info("connected");
$self->log_info('connected');
return $self;
}
sub _log_debug {
my ( $self, $msg ) = @_;
$self->{logger}->debug("[$self->{peer}] $msg");
}
sub _log_info {
my ( $self, $msg ) = @_;
$self->{logger}->info("[$self->{peer}] $msg");
}
sub _log_error {
my ( $self, $msg ) = @_;
$self->{logger}->error("[$self->{peer}] $msg");
}
sub _set_error {
my ( $self, $args, $code, $msg ) = @_;
( $args->{ERR_CODE}, $args->{ERR_STR} ) = ( $code, $msg );
$self->_log_error(" returning error $code: $msg");
}
sub _set_error_from_zoom {
my ( $self, $args, $exception ) = @_;
$self->_set_error( $args, ERR_TEMPORARY_ERROR, 'Cannot connect to upstream server' );
$self->_log_error(
"Zebra upstream error: " .
$exception->message() . " (" .
$exception->code() . ") " .
( $exception->addinfo() // '' ) . " " .
$exception->diagset()
);
}
# This code originally went through C4::Search::getRecords, but had to use so many escape hatches
# that it was easier to directly connect to Zebra.
sub _start_search {
my ( $self, $args, $in_retry ) = @_;
my $database = $args->{DATABASES}->[0];
my ( $connection, $results );
=head3 search_handler
eval {
$connection = C4::Context->Zconn(
# We're depending on the caller to have done some validation.
$database eq 'biblios' ? 'biblioserver' : 'authorityserver',
0 # No, no async, doesn't really help much for single-server searching
);
Callback that is called when a new search is performed
$results = $connection->search_pqf( $args->{QUERY} );
Calls C<start_search> for backend-specific retrieval logic
$self->_log_debug(' retry successful') if ($in_retry);
};
if ($@) {
die $@ if ( ref($@) ne 'ZOOM::Exception' );
if ( $@->diagset() eq 'ZOOM' && $@->code() == 10004 && !$in_retry ) {
$self->_log_debug(' upstream server lost connection, retrying');
return $self->_start_search( $args, 1 );
}
$self->_set_error_from_zoom( $args, $@ );
$connection = undef;
}
return ( $connection, $results, $results ? $results->size() : -1 );
}
sub _check_fetch {
my ( $self, $resultset, $args, $offset, $num_records ) = @_;
if ( !defined( $resultset ) ) {
$self->_set_error( $args, ERR_NO_SUCH_RESULTSET, 'No such resultset' );
return 0;
}
if ( $offset < 0 || $offset + $num_records > $resultset->{hits} ) {
$self->_set_error( $args, ERR_PRESENT_OUT_OF_RANGE, 'Present request out of range' );
return 0;
}
return 1;
}
sub _fetch_record {
my ( $self, $resultset, $args, $index, $num_to_prefetch ) = @_;
my $record;
eval {
if ( !$resultset->{results}->record_immediate( $index ) ) {
my $start = $num_to_prefetch ? int( $index / $num_to_prefetch ) * $num_to_prefetch : $index;
if ( $start + $num_to_prefetch >= $resultset->{results}->size() ) {
$num_to_prefetch = $resultset->{results}->size() - $start;
}
$self->_log_debug(" fetch uncached, fetching $num_to_prefetch records starting at $start");
$resultset->{results}->records( $start, $num_to_prefetch, 0 );
}
$record = $resultset->{results}->record_immediate( $index )->raw();
};
if ($@) {
die $@ if ( ref($@) ne 'ZOOM::Exception' );
$self->_set_error_from_zoom( $args, $@ );
return;
} else {
return $record;
}
}
=cut
sub search_handler {
# Called when search is first sent.
my ( $self, $args ) = @_;
my $database = $args->{DATABASES}->[0];
if ( $database !~ /^(biblios|authorities)$/ ) {
$self->_set_error( $args, ERR_DB_DOES_NOT_EXIST, 'No such database' );
if ( $database ne $Koha::SearchEngine::BIBLIOS_INDEX && $database ne $Koha::SearchEngine::AUTHORITIES_INDEX ) {
$self->set_error( $args, $self->ERR_DB_DOES_NOT_EXIST, 'No such database' );
return;
}
my $query = $args->{QUERY};
$self->_log_info("received search for '$query', (RS $args->{SETNAME})");
my ( $connection, $results, $num_hits ) = $self->_start_search( $args );
return unless $connection;
$args->{HITS} = $num_hits;
my $resultset = $self->{resultsets}->{ $args->{SETNAME} } = {
database => $database,
connection => $connection,
results => $results,
query => $args->{QUERY},
hits => $args->{HITS},
};
}
$self->log_info("received search for '$query', (RS $args->{SETNAME})");
sub present_handler {
# Called when a set of records is requested.
my ( $self, $args ) = @_;
my ($resultset, $hits) = $self->start_search( $args, $self->{server}->{num_to_prefetch} );
return unless $resultset;
$self->_log_debug("received present for $args->{SETNAME}, $args->{START}+$args->{NUMBER}");
$args->{HITS} = $hits;
$self->{resultsets}->{ $args->{SETNAME} } = $resultset;
}
my $resultset = $self->{resultsets}->{ $args->{SETNAME} };
# The offset comes across 1-indexed.
my $offset = $args->{START} - 1;
=head3 fetch_handler
return unless $self->_check_fetch( $resultset, $args, $offset, $args->{NUMBER} );
Callback that is called when records are requested
}
Calls C<fetch_record> for backend-specific retrieval logic
=cut
sub fetch_handler {
# Called when a given record is requested.
my ( $self, $args ) = @_;
my $session = $args->{HANDLE};
$self->log_debug("received fetch for RS $args->{SETNAME}, record $args->{OFFSET}");
my $server = $self->{server};
$self->_log_debug("received fetch for $args->{SETNAME}, record $args->{OFFSET}");
my $form_oid = $args->{REQ_FORM} // '';
my $composition = $args->{COMP} // '';
$self->_log_debug(" form OID $form_oid, composition $composition");
$self->log_debug(" form OID '$form_oid', composition '$composition'");
my $resultset = $session->{resultsets}->{ $args->{SETNAME} };
my $resultset = $self->{resultsets}->{ $args->{SETNAME} };
# The offset comes across 1-indexed.
my $offset = $args->{OFFSET} - 1;
return unless $self->_check_fetch( $resultset, $args, $offset, 1 );
return unless $self->check_fetch( $resultset, $args, $offset, 1 );
$args->{LAST} = 1 if ( $offset == $resultset->{hits} - 1 );
my $record = $self->_fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} );
my $record = $self->fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} );
return unless $record;
# Note that new_record_from_zebra is badly named and works also with Elasticsearch
$record = C4::Search::new_record_from_zebra(
$resultset->{database} eq 'biblios' ? 'biblioserver' : 'authorityserver',
$record
@ -254,16 +172,84 @@ sub fetch_handler {
}
}
if ( $form_oid eq MARCXML_OID && $composition eq 'marcxml' ) {
if ( $form_oid eq $self->MARCXML_OID && $composition eq 'marcxml' ) {
$args->{RECORD} = $record->as_xml_record();
} elsif ( ( $form_oid eq USMARC_OID || $form_oid eq UNIMARC_OID ) && ( !$composition || $composition eq 'F' ) ) {
} elsif ( ( $form_oid eq $self->USMARC_OID || $form_oid eq $self->UNIMARC_OID ) && ( !$composition || $composition eq 'F' ) ) {
$args->{RECORD} = $record->as_usmarc();
} else {
$self->_set_error( $args, ERR_SYNTAX_UNSUPPORTED, "Unsupported syntax/composition $form_oid/$composition" );
$self->set_error( $args, $self->ERR_SYNTAX_UNSUPPORTED, "Unsupported syntax/composition $form_oid/$composition" );
return;
}
}
=head3 close_handler
Callback that is called when a session is terminated
=cut
sub close_handler {
my ( $self, $args ) = @_;
# Override in a child class to add functionality
}
=head3 start_search
my ($resultset, $hits) = $self->_start_search( $args, $self->{server}->{num_to_prefetch} );
A backend-specific method for starting a new search
=cut
sub start_search {
die('Abstract method');
}
=head3 check_fetch
$self->check_fetch($resultset, $args, $offset, $num_records);
Check that the fetch request parameters are within bounds of the result set.
=cut
sub check_fetch {
my ( $self, $resultset, $args, $offset, $num_records ) = @_;
if ( !defined( $resultset ) ) {
$self->set_error( $args, ERR_NO_SUCH_RESULTSET, 'No such resultset' );
return 0;
}
if ( $offset < 0 || $offset + $num_records > $resultset->{hits} ) {
$self->set_error( $args, ERR_PRESENT_OUT_OF_RANGE, 'Present request out of range' );
return 0;
}
return 1;
}
=head3 fetch_record
my $record = $self->_fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} );
A backend-specific method for fetching a record
=cut
sub fetch_record {
die('Abstract method');
}
=head3 add_item_status
$self->add_item_status( $field );
Add item status to the given field
=cut
sub add_item_status {
my ( $self, $field ) = @_;
@ -318,12 +304,60 @@ sub add_item_status {
}
}
sub close_handler {
my ( $self, $args ) = @_;
foreach my $resultset ( values %{ $self->{resultsets} } ) {
$resultset->{results}->destroy();
}
=head3 log_debug
$self->log_debug('Message');
Output a debug message
=cut
sub log_debug {
my ( $self, $msg ) = @_;
$self->{logger}->debug("[$self->{peer}] $msg");
}
=head3 log_info
$self->log_info('Message');
Output an info message
=cut
sub log_info {
my ( $self, $msg ) = @_;
$self->{logger}->info("[$self->{peer}] $msg");
}
=head3 log_error
$self->log_error('Message');
Output an error message
=cut
sub log_error {
my ( $self, $msg ) = @_;
$self->{logger}->error("[$self->{peer}] $msg");
}
=head3 set_error
$self->set_error($args, $self->ERR_SEARCH_FAILED, 'Backend connection failed' );
Set and log an error code and diagnostic message to be returned to the client
=cut
sub set_error {
my ( $self, $args, $code, $msg ) = @_;
( $args->{ERR_CODE}, $args->{ERR_STR} ) = ( $code, $msg );
$self->log_error(" returning error $code: $msg");
}
1;

163
Koha/Z3950Responder/ZebraSession.pm

@ -0,0 +1,163 @@
#!/usr/bin/perl
package Koha::Z3950Responder::ZebraSession;
# Copyright ByWater Solutions 2016
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with Koha; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use Modern::Perl;
use base qw( Koha::Z3950Responder::Session );
use Koha::Logger;
use ZOOM;
=head1 NAME
Koha::Z3950Responder::ZebraSession
=head1 SYNOPSIS
Zebra-specific session class that uses C<Koha::Session> as the base class.
=head1 FUNCTIONS
=head2 INSTANCE METHODS
=head3 start_search
my ($resultset, $hits) = $self->_start_search( $args, $self->{server}->{num_to_prefetch} );
Connect to Zebra and do the search
=cut
sub start_search {
my ( $self, $args, $num_to_prefetch, $in_retry ) = @_;
my $database = $args->{DATABASES}->[0];
my ( $connection, $results );
eval {
$connection = C4::Context->Zconn(
# We're depending on the caller to have done some validation.
$database eq 'biblios' ? 'biblioserver' : 'authorityserver',
0 # No, no async, doesn't really help much for single-server searching
);
$results = $connection->search_pqf( $args->{QUERY} );
$self->log_debug(' retry successful') if ($in_retry);
};
if ($@) {
die $@ if ( ref($@) ne 'ZOOM::Exception' );
if ( $@->diagset() eq 'ZOOM' && $@->code() == 10004 && !$in_retry ) {
$self->log_debug(' upstream server lost connection, retrying');
return $self->_start_search( $args, $num_to_prefetch, 1 );
}
$self->_set_error_from_zoom( $args, $@ );
$connection = undef;
}
my $hits = $results ? $results->size() : -1;
my $resultset = {
database => $database,
connection => $connection,
results => $results,
query => $args->{QUERY},
hits => $hits
};
return ( $resultset, $hits );
}
=head3 fetch_record
my $record = $self->_fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} );
Fetch a record from Zebra. Caches records in session to avoid too many fetches.
=cut
sub fetch_record {
my ( $self, $resultset, $args, $index, $num_to_prefetch ) = @_;
my $record;
eval {
if ( !$resultset->{results}->record_immediate( $index ) ) {
my $start = $num_to_prefetch ? int( $index / $num_to_prefetch ) * $num_to_prefetch : $index;
if ( $start + $num_to_prefetch >= $resultset->{results}->size() ) {
$num_to_prefetch = $resultset->{results}->size() - $start;
}
$self->log_debug(" fetch uncached, fetching $num_to_prefetch records starting at $start");
$resultset->{results}->records( $start, $num_to_prefetch, 0 );
}
$record = $resultset->{results}->record_immediate( $index )->raw();
};
if ($@) {
die $@ if ( ref($@) ne 'ZOOM::Exception' );
$self->_set_error_from_zoom( $args, $@ );
return;
} else {
return $record;
}
}
=head3 close_handler
Callback that is called when a session is terminated
=cut
sub close_handler {
my ( $self, $args ) = @_;
foreach my $resultset ( values %{ $self->{resultsets} } ) {
$resultset->{results}->destroy();
}
}
=head3 _set_error_from_zoom
$self->_set_error_from_zoom( $args, $@ );
Log and set error code and diagnostic message from a ZOOM exception
=cut
sub _set_error_from_zoom {
my ( $self, $args, $exception ) = @_;
$self->set_error( $args, $self->ERR_TEMPORARY_ERROR, 'Cannot connect to upstream server' );
$self->log_error(
"Zebra upstream error: " .
$exception->message() . " (" .
$exception->code() . ") " .
( $exception->addinfo() // '' ) . " " .
$exception->diagset()
);
}
1;

46
etc/z3950/attribute_mappings.yaml

@ -0,0 +1,46 @@
---
# Mappings from Z39.50 USE attributes to Koha search fields
authorities:
# BIB-1 use attributes to index fields
use:
1: Personal-name
2: Heading
3: Heading
9: LC-card-number
12: Local-number
default: _all
biblios:
# BIB-1 use attributes to index fields
use:
1: author
2: author
3: author
4: title
5: se
7: isbn
8: issn
9: LC-card-number
10: bnb-card-number
11: bgf-number
12: Local-number
20: local-classification
21: subject
30:
- acqdate
- copydate
- pubdate
31: pubdate
32: acqdate
52: control-number
1003: author
1007: identifier-standard
1011: date-entered-on-file
1012: date-time-last-modified
1018: publisher
1019: record-source
1021: bib-level
1028: barcode
1031: itype
1033: Host-Item-Number
1045: control-number
default: _all

12
etc/z3950/config.xml

@ -0,0 +1,12 @@
<yazgfs>
<server>
<cql2rpn>pqf.properties</cql2rpn>
<explain xmlns="http://explain.z3950.org/dtd/2.0/">
<retrievalinfo>
<retrieval syntax="usmarc" name="marc21"/>
<retrieval syntax="unimarc" name="unimarc"/>
<retrieval syntax="xml" name="marcxml" identifier="info:srw/schema/1/marcxml-v1.1"/>
</retrievalinfo>
</explain>
</server>
</yazgfs>

162
etc/z3950/pqf.properties

@ -0,0 +1,162 @@
#
# Propeties file to drive org.z3950.zing.cql.CQLNode's toPQF()
# back-end and the YAZ CQL-to-PQF converter. This specifies the
# interpretation of various CQL indexes, relations, etc. in terms
# of Type-1 query attributes.
#
# This configuration file generates queries using BIB-1 attributes.
# See http://www.loc.gov/z3950/agency/zing/cql/dc-indexes.html
# for the Maintenance Agency's work-in-progress mapping of Dublin Core
# indexes to Attribute Architecture (util, XD and BIB-2)
# attributes.
# Identifiers for prefixes used in this file. (index.*)
set.cql = info:srw/cql-context-set/1/cql-v1.1
set.rec = info:srw/cql-context-set/2/rec-1.0
set.dc = info:srw/cql-context-set/1/dc-v1.1
set.bath = http://zing.z3950.org/cql/bath/2.0/
# default set (in query)
set = info:srw/cql-context-set/1/dc-v1.1
# The default access point and result-set references
index.cql.serverChoice = 1=1016
# srw.serverChoice is deprecated in favour of cql.serverChoice
# BIB-1 "any"
index.rec.id = 1=12
index.dc.identifier = 1=1007
index.dc.title = 1=4
index.dc.subject = 1=21
index.dc.creator = 1=1003
index.dc.author = 1=1003
index.dc.itemtype = 1=1031
index.dc.barcode = 1=1028
index.dc.branch = 1=1033
index.dc.isbn = 1=7
index.dc.issn = 1=8
index.dc.any = 1=1016
index.dc.note = 1=63
# personal name experimental
index.dc.pname = 1=1
### Unofficial synonym for "creator"
index.dc.editor = 1=1020
index.dc.publisher = 1=1018
index.dc.description = 1=62
# "abstract"
index.dc.date = 1=30
index.dc.resourceType = 1=1031
# guesswork: "Material-type"
index.dc.format = 1=1034
# guesswork: "Content-type"
index.dc.resourceIdentifier = 1=12
# "Local number"
#index.dc.source = 1=1019
# "Record-source"
index.dc.language = 1=54
# "Code--language"
index.dc.Place-publication = 1=59
# "Place-publication"
#index.dc.relation = 1=?
### No idea how to represent this
#index.dc.coverage = 1=?
### No idea how to represent this
#index.dc.rights = 1=?
### No idea how to represent this
# Relation attributes are selected according to the CQL relation by
# looking up the "relation.<relation>" property:
#
relation.< = 2=1
relation.le = 2=2
relation.eq = 2=3
relation.exact = 2=3
relation.ge = 2=4
relation.> = 2=5
relation.<> = 2=6
### These two are not really right:
relation.all = 2=3
relation.any = 2=3
# BIB-1 doesn't have a server choice relation, so we just make the
# choice here, and use equality (which is clearly correct).
relation.scr = 2=3
# Relation modifiers.
#
relationModifier.relevant = 2=102
relationModifier.fuzzy = 5=103
### 100 is "phonetic", which is not quite the same thing
relationModifier.stem = 2=101
relationModifier.phonetic = 2=100
# Position attributes may be specified for anchored terms (those
# beginning with "^", which is stripped) and unanchored (those not
# beginning with "^"). This may change when we get a BIB-1 truncation
# attribute that says "do what CQL does".
#
position.first = 3=1 6=1
# "first in field"
position.any = 3=3 6=1
# "any position in field"
position.last = 3=4 6=1
# not a standard BIB-1 attribute
position.firstAndLast = 3=3 6=3
# search term is anchored to be complete field
# Structure attributes may be specified for individual relations; a
# default structure attribute my be specified by the pseudo-relation
# "*", to be used whenever a relation not listed here occurs.
#
structure.exact = 4=108
# string
structure.all = 4=2
structure.any = 4=2
structure.* = 4=1
# phrase
# Truncation attributes used to implement CQL wildcard patterns. The
# simpler forms, left, right- and both-truncation will be used for the
# simplest patterns, so that we produce PQF queries that conform more
# closely to the Bath Profile. However, when a more complex pattern
# such as "foo*bar" is used, we fall back on Z39.58-style masking.
#
truncation.right = 5=1
truncation.left = 5=2
truncation.both = 5=3
truncation.none = 5=100
truncation.z3958 = 5=104
# Finally, any additional attributes that should always be included
# with each term can be specified in the "always" property.
#
always = 6=1
# 6=1: completeness = incomplete subfield
# Bath Profile support, added Thu Dec 18 13:06:20 GMT 2003
# See the Bath Profile for SRW at
# http://zing.z3950.org/cql/bath.html
# including the Bath Context Set defined within that document.
#
# In this file, we only map index-names to BIB-1 use attributes, doing
# so in accordance with the specifications of the Z39.50 Bath Profile,
# and leaving the relations, wildcards, etc. to fend for themselves.
index.bath.keyTitle = 1=33
index.bath.possessingInstitution = 1=1044
index.bath.name = 1=1002
index.bath.personalName = 1=1
index.bath.corporateName = 1=2
index.bath.conferenceName = 1=3
index.bath.uniformTitle = 1=6
index.bath.isbn = 1=7
index.bath.issn = 1=8
index.bath.geographicName = 1=58
index.bath.notes = 1=63
index.bath.topicalSubject = 1=1079
index.bath.genreForm = 1=1075

28
misc/z3950_responder.pl

@ -20,10 +20,11 @@
use Modern::Perl;
use Carp;
use File::Basename;
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use C4::Context;
use Koha::Config;
use Koha::Z3950Responder;
=head1 SYNOPSIS
@ -31,7 +32,7 @@ use Koha::Z3950Responder;
z3950_responder.pl [-h|--help] [--man] [-a <pdufile>] [-v <loglevel>] [-l <logfile>] [-u <user>]
[-c <config>] [-t <minutes>] [-k <kilobytes>] [-d <daemon>] [-p <pidfile>]
[-C certfile] [-zKiDST1] [-m <time-format>] [-w <directory>] [--debug]
[--add-item-status=SUBFIELD] [--prefetch=NUM_RECORDS]
[--add-item-status=SUBFIELD] [--prefetch=NUM_RECORDS] [--config-dir=<directory>]
[<listener-addr>... ]
=head1 OPTIONS
@ -51,7 +52,7 @@ Displays manual page and exits.
=item B<--debug>
Turns on debug logging to the screen, and turns on single-process mode.
Turns on debug logging to the screen and the single-process mode.
=item B<--add-item-status=SUBFIELD>
@ -64,7 +65,12 @@ status string.
=item B<--prefetch=NUM_RECORDS>
Number of records to prefetch from Zebra. Defaults to 20.
Number of records to prefetch. Defaults to 20.
=item B<--config-dir=directory>
Directory where to find configuration files required for proper operation. Defaults to z3950 under
the Koha config directory.
=back
@ -101,6 +107,8 @@ my $debug = 0;
my $help;
my $man;
my $prefetch = 20;
my $config_dir = '';
my @yaz_options;
sub add_yaz_option {
@ -122,6 +130,7 @@ GetOptions(
'--add-item-status=s' => \$add_item_status_subfield,
'--add-status-multi-subfield' => \$add_status_multi_subfield,
'--prefetch=i' => \$prefetch,
'--config-dir=s' => \$config_dir,
# Pass through YAZ options.
'a=s' => \&add_yaz_option,
'v=s' => \&add_yaz_option,
@ -152,15 +161,22 @@ if (!@ARGV || $ARGV[-1] =~ /^-/) {
push(@ARGV, '@:2100');
}
# Create and start the server.
# If config_dir is not defined, default to z3950 under the Koha config directory
if (!$config_dir) {
(undef, $config_dir) = fileparse(Koha::Config->guess_koha_conf);
$config_dir .= 'z3950/';
} else {
$config_dir .= '/' if ($config_dir !~ /\/$/);
}
die "This tool only works with Zebra" if C4::Context->preference('SearchEngine') ne 'Zebra';
# Create and start the server.
my $z = Koha::Z3950Responder->new( {
add_item_status_subfield => $add_item_status_subfield,
add_status_multi_subfield => $add_status_multi_subfield,
debug => $debug,
num_to_prefetch => $prefetch,
config_dir => $config_dir,
yaz_options => [ @yaz_options, @ARGV ],
} );

Loading…
Cancel
Save