From ac28d8043d16342ed198cceca9168498ced37fd6 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Thu, 22 Nov 2018 11:20:01 +0200 Subject: [PATCH] Bug 13937: Add support for SRU and Elasticsearch Splits Session to GenericSession and ZebraSession where Generic supports any search backend via the SearchEngine classes and Zebra maintains the direct channel to the Zebra server. Adds config files required for mapping BIB-1 attributes to Koha search fields and SRU indexes to BIB-1 attributes. Adds PODs. Sponsored-by: National Library of Finland Signed-off-by: Stefan Berndtsson Signed-off-by: Martin Renvoize --- Koha/Z3950Responder.pm | 110 ++++++-- Koha/Z3950Responder/GenericSession.pm | 113 +++++++++ Koha/Z3950Responder/RPN.pm | 117 +++++++++ Koha/Z3950Responder/Session.pm | 344 ++++++++++++++------------ Koha/Z3950Responder/ZebraSession.pm | 163 ++++++++++++ etc/z3950/attribute_mappings.yaml | 46 ++++ etc/z3950/config.xml | 12 + etc/z3950/pqf.properties | 162 ++++++++++++ misc/z3950_responder.pl | 28 ++- 9 files changed, 916 insertions(+), 179 deletions(-) create mode 100644 Koha/Z3950Responder/GenericSession.pm create mode 100644 Koha/Z3950Responder/RPN.pm create mode 100644 Koha/Z3950Responder/ZebraSession.pm create mode 100644 etc/z3950/attribute_mappings.yaml create mode 100644 etc/z3950/config.xml create mode 100644 etc/z3950/pqf.properties diff --git a/Koha/Z3950Responder.pm b/Koha/Z3950Responder.pm index c7a7bf2973..93d0ac2e33 100644 --- a/Koha/Z3950Responder.pm +++ b/Koha/Z3950Responder.pm @@ -24,11 +24,44 @@ use Modern::Perl; use C4::Biblio qw( GetMarcFromKohaField ); use C4::Koha qw( GetAuthorisedValues ); -use Koha; -use Koha::Z3950Responder::Session; - use Net::Z3950::SimpleServer; +=head1 NAME + +Koha::Z3950Responder - Main class for interfacing with Net::Z3950::SimpleServer + +=head1 SYNOPSIS + + use Koha::Z3950Responder; + + my $z = Koha::Z3950Responder->new( { + add_item_status_subfield => 1, + add_status_multi_subfield => 1, + debug => 0, + num_to_prefetch => 20, + config_dir => '/home/koha/etc', + yaz_options => [ ], + } ); + + $z->start(); + +=head1 DESCRIPTION + +A daemon class that interfaces with Net::Z3950::SimpleServer to provider Z39.50/SRU +service. Uses a Session class for the actual functionality. + +=head1 METHODS + +=head2 INSTANCE METHODS + +=head3 new + + $self->new({ + add_item_status_subfield => 1 + }); + +=cut + sub new { my ( $class, $config ) = @_; @@ -66,10 +99,15 @@ sub new { unshift @{ $self->{yaz_options} }, '-v', 'none,fatal'; } + # Set main config for SRU support + unshift @{ $self->{yaz_options} }, '-f', $self->{config_dir} . 'config.xml' if $self->{config_dir}; + + # Set num to prefetch if not passed + $self->{num_to_prefetch} //= 20; + $self->{server} = Net::Z3950::SimpleServer->new( INIT => sub { $self->init_handler(@_) }, SEARCH => sub { $self->search_handler(@_) }, - PRESENT => sub { $self->present_handler(@_) }, FETCH => sub { $self->fetch_handler(@_) }, CLOSE => sub { $self->close_handler(@_) }, ); @@ -77,25 +115,52 @@ sub new { return bless( $self, $class ); } +=head3 start + + $z->start(); + +Start the daemon and begin serving requests. Does not return unless initialization fails or a +fatal error occurs. + +=cut + sub start { my ( $self ) = @_; $self->{server}->launch_server( 'Koha::Z3950Responder', @{ $self->{yaz_options} } ) } -# The rest of these methods are SimpleServer callbacks bound to this Z3950Responder object. It's -# worth noting that these callbacks don't return anything; they both receive and return data in the -# $args hashref. +=head2 CALLBACKS + +These methods are SimpleServer callbacks bound to this Z3950Responder object. +It's worth noting that these callbacks don't return anything; they both +receive and return data in the $args hashref. + +=head3 init_handler + +Callback that is called when a new connection is initialized + +=cut sub init_handler { # Called when the client first connects. my ( $self, $args ) = @_; # This holds all of the per-connection state. - my $session = Koha::Z3950Responder::Session->new({ - server => $self, - peer => $args->{PEER_NAME}, - }); + my $session; + if (C4::Context->preference('SearchEngine') eq 'Zebra') { + use Koha::Z3950Responder::ZebraSession; + $session = Koha::Z3950Responder::ZebraSession->new({ + server => $self, + peer => $args->{PEER_NAME}, + }); + } else { + use Koha::Z3950Responder::GenericSession; + $session = Koha::Z3950Responder::GenericSession->new({ + server => $self, + peer => $args->{PEER_NAME} + }); + } $args->{HANDLE} = $session; @@ -103,27 +168,36 @@ sub init_handler { $args->{IMP_VER} = Koha::version; } +=head3 search_handler + +Callback that is called when a new search is performed + +=cut + sub search_handler { - # Called when search is first sent. my ( $self, $args ) = @_; $args->{HANDLE}->search_handler($args); } -sub present_handler { - # Called when a set of records is requested. - my ( $self, $args ) = @_; +=head3 fetch_handler - $args->{HANDLE}->present_handler($args); -} +Callback that is called when records are requested + +=cut sub fetch_handler { - # Called when a given record is requested. my ( $self, $args ) = @_; $args->{HANDLE}->fetch_handler( $args ); } +=head3 close_handler + +Callback that is called when a session is terminated + +=cut + sub close_handler { my ( $self, $args ) = @_; diff --git a/Koha/Z3950Responder/GenericSession.pm b/Koha/Z3950Responder/GenericSession.pm new file mode 100644 index 0000000000..0daf2258d4 --- /dev/null +++ b/Koha/Z3950Responder/GenericSession.pm @@ -0,0 +1,113 @@ +#!/usr/bin/perl + +package Koha::Z3950Responder::GenericSession; + +# Copyright The National Library of Finland 2018 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +use Modern::Perl; + +use base qw( Koha::Z3950Responder::Session ); + +use Koha::Logger; +use Koha::SearchEngine::Search; +use Koha::SearchEngine::QueryBuilder; +use Koha::Z3950Responder::RPN; + +=head1 NAME + +Koha::Z3950Responder::genericSession + +=head1 SYNOPSIS + +Backend-agnostic session class that uses C as the base class. Utilizes +C for the actual functionality. + +=head2 INSTANCE METHODS + +=head3 start_search + + my ($resultset, $hits) = $self->start_search( $args, $self->{server}->{num_to_prefetch} ); + +Perform a search using C's QueryBuilder and Search. + +=cut + +sub start_search { + my ( $self, $args, $num_to_prefetch ) = @_; + + if (!defined $self->{'attribute_mappings'}) { + require YAML; + $self->{'attribute_mappings'} = YAML::LoadFile($self->{server}->{config_dir} . 'attribute_mappings.yaml'); + } + + my $database = $args->{DATABASES}->[0]; + my $builder = Koha::SearchEngine::QueryBuilder->new({ index => $database }); + my $searcher = Koha::SearchEngine::Search->new({ index => $database }); + + my $built_query; + my $query = $args->{RPN}->{'query'}->to_koha($self->{'attribute_mappings'}->{$database}); + $self->log_debug(" parsed search: $query"); + my @operands = $query; + (undef, $built_query) = $builder->build_query_compat( undef, \@operands, undef, undef, undef, 0); + + my ($error, $marcresults, $hits ) = $searcher->simple_search_compat($built_query, 0, $num_to_prefetch); + if (defined $error) { + $self->set_error($args, $self->ERR_SEARCH_FAILED, 'Search failed'); + return; + } + + my $resultset = { + query => $built_query, + database => $database, + cached_offset => 0, + cached_results => $marcresults, + hits => $hits + }; + + return ($resultset, $hits); +} + +=head3 fetch_record + + my $record = $self->fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} ); + +Fetch a record from SearchEngine. Caches records in session to avoid too many fetches. + +=cut + +sub fetch_record { + my ( $self, $resultset, $args, $index, $num_to_prefetch ) = @_; + + # Fetch more records if necessary + my $offset = $args->{OFFSET} - 1; + if ($offset < $resultset->{cached_offset} || $offset >= $resultset->{cached_offset} + $num_to_prefetch) { + $self->log_debug(" fetch uncached, fetching $num_to_prefetch records starting at $offset"); + my $searcher = Koha::SearchEngine::Search->new({ index => $resultset->{'database'} }); + my ($error, $marcresults, $num_hits ) = $searcher->simple_search_compat($resultset->{'query'}, $offset, $num_to_prefetch); + if (defined $error) { + $self->set_error($args, $self->ERR_TEMPORARY_ERROR, 'Fetch failed'); + return; + } + + $resultset->{cached_offset} = $offset; + $resultset->{cached_results} = $marcresults; + } + return $resultset->{cached_results}[$offset - $resultset->{cached_offset}]; +} + +1; diff --git a/Koha/Z3950Responder/RPN.pm b/Koha/Z3950Responder/RPN.pm new file mode 100644 index 0000000000..3a1158b9d4 --- /dev/null +++ b/Koha/Z3950Responder/RPN.pm @@ -0,0 +1,117 @@ +#!/usr/bin/perl + +# Copyright The National Library of Finland 2018 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under thes +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +use Modern::Perl; + +=head1 NAME + +Koha::Z3950Responder::RPN + +=head1 SYNOPSIS + +Overrides for the C classes adding a C method that +converts the query to a syntax that C understands. + +=head1 DESCRIPTION + +The method used here is described in C of +C. + +=cut + +package Net::Z3950::RPN::Term; +sub to_koha { + my ($self, $mappings) = @_; + + my $attrs = $self->{'attributes'}; + my $fields = $mappings->{use}{default} // '_all'; + my $split = 0; + my $quote = ''; + my $prefix = ''; + my $suffix = ''; + my $term = $self->{'term'}; + + if ($attrs) { + foreach my $attr (@$attrs) { + if ($attr->{'attributeType'} == 1) { # use + my $use = $attr->{'attributeValue'}; + $fields = $mappings->{use}{$use} if defined $mappings->{use}{$use}; + } elsif ($attr->{'attributeType'} == 4) { # structure + $split = 1 if ($attr->{'attributeValue'} == 2); + $quote = '"' if ($attr->{'attributeValue'} == 1); + } elsif ($attr->{'attributeType'} == 5) { # truncation + my $truncation = $attr->{'attributeValue'}; + $prefix = '*' if ($truncation == 2 || $truncation == 3); + $suffix = '*' if ($truncation == 1 || $truncation == 3); + } + } + } + + $fields = [$fields] unless ref($fields) eq 'ARRAY'; + + if ($split) { + my @terms; + foreach my $word (split(/\s/, $term)) { + $word =~ s/^[\,\.;:\\\/\"\'\-\=]+//g; + $word =~ s/[\,\.;:\\\/\"\'\-\=]+$//g; + next if (!$word); + my @words; + foreach my $field (@{$fields}) { + push(@words, "$field:($prefix$word$suffix)"); + } + push (@terms, join(' OR ', @words)); + } + return '(' . join(' AND ', @terms) . ')'; + } + + my @terms; + foreach my $field (@{$fields}) { + push(@terms, "$field:($prefix$term$suffix)"); + } + return '(' . join(' OR ', @terms) . ')'; +} + +package Net::Z3950::RPN::And; +sub to_koha +{ + my ($self, $mappings) = @_; + + return '(' . $self->[0]->to_koha($mappings) . ' AND ' . + $self->[1]->to_koha($mappings) . ')'; +} + +package Net::Z3950::RPN::Or; +sub to_koha +{ + my ($self, $mappings) = @_; + + return '(' . $self->[0]->to_koha($mappings) . ' OR ' . + $self->[1]->to_koha($mappings) . ')'; +} + +package Net::Z3950::RPN::AndNot; +sub to_koha +{ + my ($self, $mappings) = @_; + + return '(' . $self->[0]->to_koha($mappings) . ' NOT ' . + $self->[1]->to_koha($mappings) . ')'; +} + +1; diff --git a/Koha/Z3950Responder/Session.pm b/Koha/Z3950Responder/Session.pm index 338056bb15..c64cc0c5a2 100644 --- a/Koha/Z3950Responder/Session.pm +++ b/Koha/Z3950Responder/Session.pm @@ -28,7 +28,28 @@ use C4::Reserves qw( GetReserveStatus ); use C4::Search qw(); use Koha::Logger; -use ZOOM; +=head1 NAME + +Koha::Z3950Responder::Session + +=head1 SYNOPSIS + +An abstract class where backend-specific session modules are derived from. +Z3950Responder creates one of the child classes depending on the SearchEngine +preference. + +=head1 DESCRIPTION + +This class contains common functions for handling searching for and fetching +of records. It can optionally add item status information to the returned +records. The backend-specific abstract methods need to be implemented in a +child class. + +=head2 CONSTANTS + +OIDs and diagnostic codes used in Z39.50 + +=cut use constant { UNIMARC_OID => '1.2.840.10003.5.1', @@ -41,10 +62,26 @@ use constant { ERR_PRESENT_OUT_OF_RANGE => 13, ERR_RECORD_TOO_LARGE => 16, ERR_NO_SUCH_RESULTSET => 30, - ERR_SYNTAX_UNSUPPORTED => 230, + ERR_SEARCH_FAILED => 125, + ERR_SYNTAX_UNSUPPORTED => 239, ERR_DB_DOES_NOT_EXIST => 235, }; +=head1 FUNCTIONS + +=head2 INSTANCE METHODS + +=head3 new + + my $session = $self->new({ + server => $z3950responder, + peer => 'PEER NAME' + }); + +Instantiate a Session + +=cut + sub new { my ( $class, $args ) = @_; @@ -58,189 +95,70 @@ sub new { $self->{logger}->debug_to_screen(); } - $self->_log_info("connected"); + $self->log_info('connected'); return $self; } -sub _log_debug { - my ( $self, $msg ) = @_; - $self->{logger}->debug("[$self->{peer}] $msg"); -} - -sub _log_info { - my ( $self, $msg ) = @_; - $self->{logger}->info("[$self->{peer}] $msg"); -} - -sub _log_error { - my ( $self, $msg ) = @_; - $self->{logger}->error("[$self->{peer}] $msg"); -} - -sub _set_error { - my ( $self, $args, $code, $msg ) = @_; - - ( $args->{ERR_CODE}, $args->{ERR_STR} ) = ( $code, $msg ); - - $self->_log_error(" returning error $code: $msg"); -} - -sub _set_error_from_zoom { - my ( $self, $args, $exception ) = @_; - - $self->_set_error( $args, ERR_TEMPORARY_ERROR, 'Cannot connect to upstream server' ); - $self->_log_error( - "Zebra upstream error: " . - $exception->message() . " (" . - $exception->code() . ") " . - ( $exception->addinfo() // '' ) . " " . - $exception->diagset() - ); -} - -# This code originally went through C4::Search::getRecords, but had to use so many escape hatches -# that it was easier to directly connect to Zebra. -sub _start_search { - my ( $self, $args, $in_retry ) = @_; - - my $database = $args->{DATABASES}->[0]; - my ( $connection, $results ); +=head3 search_handler - eval { - $connection = C4::Context->Zconn( - # We're depending on the caller to have done some validation. - $database eq 'biblios' ? 'biblioserver' : 'authorityserver', - 0 # No, no async, doesn't really help much for single-server searching - ); + Callback that is called when a new search is performed - $results = $connection->search_pqf( $args->{QUERY} ); +Calls C for backend-specific retrieval logic - $self->_log_debug(' retry successful') if ($in_retry); - }; - if ($@) { - die $@ if ( ref($@) ne 'ZOOM::Exception' ); - - if ( $@->diagset() eq 'ZOOM' && $@->code() == 10004 && !$in_retry ) { - $self->_log_debug(' upstream server lost connection, retrying'); - return $self->_start_search( $args, 1 ); - } - - $self->_set_error_from_zoom( $args, $@ ); - $connection = undef; - } - - return ( $connection, $results, $results ? $results->size() : -1 ); -} - -sub _check_fetch { - my ( $self, $resultset, $args, $offset, $num_records ) = @_; - - if ( !defined( $resultset ) ) { - $self->_set_error( $args, ERR_NO_SUCH_RESULTSET, 'No such resultset' ); - return 0; - } - - if ( $offset < 0 || $offset + $num_records > $resultset->{hits} ) { - $self->_set_error( $args, ERR_PRESENT_OUT_OF_RANGE, 'Present request out of range' ); - return 0; - } - - return 1; -} - -sub _fetch_record { - my ( $self, $resultset, $args, $index, $num_to_prefetch ) = @_; - - my $record; - - eval { - if ( !$resultset->{results}->record_immediate( $index ) ) { - my $start = $num_to_prefetch ? int( $index / $num_to_prefetch ) * $num_to_prefetch : $index; - - if ( $start + $num_to_prefetch >= $resultset->{results}->size() ) { - $num_to_prefetch = $resultset->{results}->size() - $start; - } - - $self->_log_debug(" fetch uncached, fetching $num_to_prefetch records starting at $start"); - - $resultset->{results}->records( $start, $num_to_prefetch, 0 ); - } - - $record = $resultset->{results}->record_immediate( $index )->raw(); - }; - if ($@) { - die $@ if ( ref($@) ne 'ZOOM::Exception' ); - $self->_set_error_from_zoom( $args, $@ ); - return; - } else { - return $record; - } -} +=cut sub search_handler { - # Called when search is first sent. my ( $self, $args ) = @_; my $database = $args->{DATABASES}->[0]; - if ( $database !~ /^(biblios|authorities)$/ ) { - $self->_set_error( $args, ERR_DB_DOES_NOT_EXIST, 'No such database' ); + if ( $database ne $Koha::SearchEngine::BIBLIOS_INDEX && $database ne $Koha::SearchEngine::AUTHORITIES_INDEX ) { + $self->set_error( $args, $self->ERR_DB_DOES_NOT_EXIST, 'No such database' ); return; } my $query = $args->{QUERY}; - $self->_log_info("received search for '$query', (RS $args->{SETNAME})"); - - my ( $connection, $results, $num_hits ) = $self->_start_search( $args ); - return unless $connection; - - $args->{HITS} = $num_hits; - my $resultset = $self->{resultsets}->{ $args->{SETNAME} } = { - database => $database, - connection => $connection, - results => $results, - query => $args->{QUERY}, - hits => $args->{HITS}, - }; -} + $self->log_info("received search for '$query', (RS $args->{SETNAME})"); -sub present_handler { - # Called when a set of records is requested. - my ( $self, $args ) = @_; + my ($resultset, $hits) = $self->start_search( $args, $self->{server}->{num_to_prefetch} ); + return unless $resultset; - $self->_log_debug("received present for $args->{SETNAME}, $args->{START}+$args->{NUMBER}"); + $args->{HITS} = $hits; + $self->{resultsets}->{ $args->{SETNAME} } = $resultset; +} - my $resultset = $self->{resultsets}->{ $args->{SETNAME} }; - # The offset comes across 1-indexed. - my $offset = $args->{START} - 1; +=head3 fetch_handler - return unless $self->_check_fetch( $resultset, $args, $offset, $args->{NUMBER} ); + Callback that is called when records are requested -} +Calls C for backend-specific retrieval logic + +=cut sub fetch_handler { - # Called when a given record is requested. my ( $self, $args ) = @_; - my $session = $args->{HANDLE}; + + $self->log_debug("received fetch for RS $args->{SETNAME}, record $args->{OFFSET}"); + my $server = $self->{server}; - $self->_log_debug("received fetch for $args->{SETNAME}, record $args->{OFFSET}"); my $form_oid = $args->{REQ_FORM} // ''; my $composition = $args->{COMP} // ''; - $self->_log_debug(" form OID $form_oid, composition $composition"); + $self->log_debug(" form OID '$form_oid', composition '$composition'"); - my $resultset = $session->{resultsets}->{ $args->{SETNAME} }; + my $resultset = $self->{resultsets}->{ $args->{SETNAME} }; # The offset comes across 1-indexed. my $offset = $args->{OFFSET} - 1; - return unless $self->_check_fetch( $resultset, $args, $offset, 1 ); + return unless $self->check_fetch( $resultset, $args, $offset, 1 ); $args->{LAST} = 1 if ( $offset == $resultset->{hits} - 1 ); - my $record = $self->_fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} ); + my $record = $self->fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} ); return unless $record; + # Note that new_record_from_zebra is badly named and works also with Elasticsearch $record = C4::Search::new_record_from_zebra( $resultset->{database} eq 'biblios' ? 'biblioserver' : 'authorityserver', $record @@ -254,16 +172,84 @@ sub fetch_handler { } } - if ( $form_oid eq MARCXML_OID && $composition eq 'marcxml' ) { + if ( $form_oid eq $self->MARCXML_OID && $composition eq 'marcxml' ) { $args->{RECORD} = $record->as_xml_record(); - } elsif ( ( $form_oid eq USMARC_OID || $form_oid eq UNIMARC_OID ) && ( !$composition || $composition eq 'F' ) ) { + } elsif ( ( $form_oid eq $self->USMARC_OID || $form_oid eq $self->UNIMARC_OID ) && ( !$composition || $composition eq 'F' ) ) { $args->{RECORD} = $record->as_usmarc(); } else { - $self->_set_error( $args, ERR_SYNTAX_UNSUPPORTED, "Unsupported syntax/composition $form_oid/$composition" ); + $self->set_error( $args, $self->ERR_SYNTAX_UNSUPPORTED, "Unsupported syntax/composition $form_oid/$composition" ); return; } } +=head3 close_handler + +Callback that is called when a session is terminated + +=cut + +sub close_handler { + my ( $self, $args ) = @_; + + # Override in a child class to add functionality +} + +=head3 start_search + + my ($resultset, $hits) = $self->_start_search( $args, $self->{server}->{num_to_prefetch} ); + +A backend-specific method for starting a new search + +=cut + +sub start_search { + die('Abstract method'); +} + +=head3 check_fetch + + $self->check_fetch($resultset, $args, $offset, $num_records); + +Check that the fetch request parameters are within bounds of the result set. + +=cut + +sub check_fetch { + my ( $self, $resultset, $args, $offset, $num_records ) = @_; + + if ( !defined( $resultset ) ) { + $self->set_error( $args, ERR_NO_SUCH_RESULTSET, 'No such resultset' ); + return 0; + } + + if ( $offset < 0 || $offset + $num_records > $resultset->{hits} ) { + $self->set_error( $args, ERR_PRESENT_OUT_OF_RANGE, 'Present request out of range' ); + return 0; + } + + return 1; +} + +=head3 fetch_record + + my $record = $self->_fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} ); + +A backend-specific method for fetching a record + +=cut + +sub fetch_record { + die('Abstract method'); +} + +=head3 add_item_status + + $self->add_item_status( $field ); + +Add item status to the given field + +=cut + sub add_item_status { my ( $self, $field ) = @_; @@ -318,12 +304,60 @@ sub add_item_status { } } -sub close_handler { - my ( $self, $args ) = @_; - foreach my $resultset ( values %{ $self->{resultsets} } ) { - $resultset->{results}->destroy(); - } +=head3 log_debug + + $self->log_debug('Message'); + +Output a debug message + +=cut + +sub log_debug { + my ( $self, $msg ) = @_; + $self->{logger}->debug("[$self->{peer}] $msg"); +} + +=head3 log_info + + $self->log_info('Message'); + +Output an info message + +=cut + +sub log_info { + my ( $self, $msg ) = @_; + $self->{logger}->info("[$self->{peer}] $msg"); +} + +=head3 log_error + + $self->log_error('Message'); + +Output an error message + +=cut + +sub log_error { + my ( $self, $msg ) = @_; + $self->{logger}->error("[$self->{peer}] $msg"); +} + +=head3 set_error + + $self->set_error($args, $self->ERR_SEARCH_FAILED, 'Backend connection failed' ); + +Set and log an error code and diagnostic message to be returned to the client + +=cut + +sub set_error { + my ( $self, $args, $code, $msg ) = @_; + + ( $args->{ERR_CODE}, $args->{ERR_STR} ) = ( $code, $msg ); + + $self->log_error(" returning error $code: $msg"); } 1; diff --git a/Koha/Z3950Responder/ZebraSession.pm b/Koha/Z3950Responder/ZebraSession.pm new file mode 100644 index 0000000000..2665e5ef42 --- /dev/null +++ b/Koha/Z3950Responder/ZebraSession.pm @@ -0,0 +1,163 @@ +#!/usr/bin/perl + +package Koha::Z3950Responder::ZebraSession; + +# Copyright ByWater Solutions 2016 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +use Modern::Perl; + +use base qw( Koha::Z3950Responder::Session ); + +use Koha::Logger; + +use ZOOM; + +=head1 NAME + +Koha::Z3950Responder::ZebraSession + +=head1 SYNOPSIS + +Zebra-specific session class that uses C as the base class. + +=head1 FUNCTIONS + +=head2 INSTANCE METHODS + +=head3 start_search + + my ($resultset, $hits) = $self->_start_search( $args, $self->{server}->{num_to_prefetch} ); + +Connect to Zebra and do the search + +=cut + +sub start_search { + my ( $self, $args, $num_to_prefetch, $in_retry ) = @_; + + my $database = $args->{DATABASES}->[0]; + my ( $connection, $results ); + + eval { + $connection = C4::Context->Zconn( + # We're depending on the caller to have done some validation. + $database eq 'biblios' ? 'biblioserver' : 'authorityserver', + 0 # No, no async, doesn't really help much for single-server searching + ); + + $results = $connection->search_pqf( $args->{QUERY} ); + + $self->log_debug(' retry successful') if ($in_retry); + }; + if ($@) { + die $@ if ( ref($@) ne 'ZOOM::Exception' ); + + if ( $@->diagset() eq 'ZOOM' && $@->code() == 10004 && !$in_retry ) { + $self->log_debug(' upstream server lost connection, retrying'); + return $self->_start_search( $args, $num_to_prefetch, 1 ); + } + + $self->_set_error_from_zoom( $args, $@ ); + $connection = undef; + } + + my $hits = $results ? $results->size() : -1; + my $resultset = { + database => $database, + connection => $connection, + results => $results, + query => $args->{QUERY}, + hits => $hits + }; + + return ( $resultset, $hits ); +} + +=head3 fetch_record + + my $record = $self->_fetch_record( $resultset, $args, $offset, $server->{num_to_prefetch} ); + +Fetch a record from Zebra. Caches records in session to avoid too many fetches. + +=cut + +sub fetch_record { + my ( $self, $resultset, $args, $index, $num_to_prefetch ) = @_; + + my $record; + + eval { + if ( !$resultset->{results}->record_immediate( $index ) ) { + my $start = $num_to_prefetch ? int( $index / $num_to_prefetch ) * $num_to_prefetch : $index; + + if ( $start + $num_to_prefetch >= $resultset->{results}->size() ) { + $num_to_prefetch = $resultset->{results}->size() - $start; + } + + $self->log_debug(" fetch uncached, fetching $num_to_prefetch records starting at $start"); + + $resultset->{results}->records( $start, $num_to_prefetch, 0 ); + } + + $record = $resultset->{results}->record_immediate( $index )->raw(); + }; + if ($@) { + die $@ if ( ref($@) ne 'ZOOM::Exception' ); + $self->_set_error_from_zoom( $args, $@ ); + return; + } else { + return $record; + } +} + +=head3 close_handler + +Callback that is called when a session is terminated + +=cut + +sub close_handler { + my ( $self, $args ) = @_; + + foreach my $resultset ( values %{ $self->{resultsets} } ) { + $resultset->{results}->destroy(); + } +} + +=head3 _set_error_from_zoom + + $self->_set_error_from_zoom( $args, $@ ); + +Log and set error code and diagnostic message from a ZOOM exception + +=cut + +sub _set_error_from_zoom { + my ( $self, $args, $exception ) = @_; + + $self->set_error( $args, $self->ERR_TEMPORARY_ERROR, 'Cannot connect to upstream server' ); + $self->log_error( + "Zebra upstream error: " . + $exception->message() . " (" . + $exception->code() . ") " . + ( $exception->addinfo() // '' ) . " " . + $exception->diagset() + ); +} + +1; diff --git a/etc/z3950/attribute_mappings.yaml b/etc/z3950/attribute_mappings.yaml new file mode 100644 index 0000000000..21d7e8f3f6 --- /dev/null +++ b/etc/z3950/attribute_mappings.yaml @@ -0,0 +1,46 @@ +--- +# Mappings from Z39.50 USE attributes to Koha search fields +authorities: + # BIB-1 use attributes to index fields + use: + 1: Personal-name + 2: Heading + 3: Heading + 9: LC-card-number + 12: Local-number + default: _all +biblios: + # BIB-1 use attributes to index fields + use: + 1: author + 2: author + 3: author + 4: title + 5: se + 7: isbn + 8: issn + 9: LC-card-number + 10: bnb-card-number + 11: bgf-number + 12: Local-number + 20: local-classification + 21: subject + 30: + - acqdate + - copydate + - pubdate + 31: pubdate + 32: acqdate + 52: control-number + 1003: author + 1007: identifier-standard + 1011: date-entered-on-file + 1012: date-time-last-modified + 1018: publisher + 1019: record-source + 1021: bib-level + 1028: barcode + 1031: itype + 1033: Host-Item-Number + 1045: control-number + default: _all diff --git a/etc/z3950/config.xml b/etc/z3950/config.xml new file mode 100644 index 0000000000..bb9c2a6eff --- /dev/null +++ b/etc/z3950/config.xml @@ -0,0 +1,12 @@ + + + pqf.properties + + + + + + + + + diff --git a/etc/z3950/pqf.properties b/etc/z3950/pqf.properties new file mode 100644 index 0000000000..55919ee202 --- /dev/null +++ b/etc/z3950/pqf.properties @@ -0,0 +1,162 @@ +# +# Propeties file to drive org.z3950.zing.cql.CQLNode's toPQF() +# back-end and the YAZ CQL-to-PQF converter. This specifies the +# interpretation of various CQL indexes, relations, etc. in terms +# of Type-1 query attributes. +# +# This configuration file generates queries using BIB-1 attributes. +# See http://www.loc.gov/z3950/agency/zing/cql/dc-indexes.html +# for the Maintenance Agency's work-in-progress mapping of Dublin Core +# indexes to Attribute Architecture (util, XD and BIB-2) +# attributes. + +# Identifiers for prefixes used in this file. (index.*) +set.cql = info:srw/cql-context-set/1/cql-v1.1 +set.rec = info:srw/cql-context-set/2/rec-1.0 +set.dc = info:srw/cql-context-set/1/dc-v1.1 +set.bath = http://zing.z3950.org/cql/bath/2.0/ + +# default set (in query) +set = info:srw/cql-context-set/1/dc-v1.1 + +# The default access point and result-set references +index.cql.serverChoice = 1=1016 + # srw.serverChoice is deprecated in favour of cql.serverChoice + # BIB-1 "any" + +index.rec.id = 1=12 +index.dc.identifier = 1=1007 +index.dc.title = 1=4 +index.dc.subject = 1=21 +index.dc.creator = 1=1003 +index.dc.author = 1=1003 +index.dc.itemtype = 1=1031 +index.dc.barcode = 1=1028 +index.dc.branch = 1=1033 +index.dc.isbn = 1=7 +index.dc.issn = 1=8 +index.dc.any = 1=1016 +index.dc.note = 1=63 + +# personal name experimental +index.dc.pname = 1=1 + ### Unofficial synonym for "creator" +index.dc.editor = 1=1020 +index.dc.publisher = 1=1018 +index.dc.description = 1=62 + # "abstract" +index.dc.date = 1=30 +index.dc.resourceType = 1=1031 + # guesswork: "Material-type" +index.dc.format = 1=1034 + # guesswork: "Content-type" +index.dc.resourceIdentifier = 1=12 + # "Local number" +#index.dc.source = 1=1019 + # "Record-source" +index.dc.language = 1=54 + # "Code--language" + +index.dc.Place-publication = 1=59 + # "Place-publication" + +#index.dc.relation = 1=? + ### No idea how to represent this +#index.dc.coverage = 1=? + ### No idea how to represent this +#index.dc.rights = 1=? + ### No idea how to represent this + +# Relation attributes are selected according to the CQL relation by +# looking up the "relation." property: +# +relation.< = 2=1 +relation.le = 2=2 +relation.eq = 2=3 +relation.exact = 2=3 +relation.ge = 2=4 +relation.> = 2=5 +relation.<> = 2=6 + +### These two are not really right: +relation.all = 2=3 +relation.any = 2=3 + +# BIB-1 doesn't have a server choice relation, so we just make the +# choice here, and use equality (which is clearly correct). +relation.scr = 2=3 + +# Relation modifiers. +# +relationModifier.relevant = 2=102 +relationModifier.fuzzy = 5=103 + ### 100 is "phonetic", which is not quite the same thing +relationModifier.stem = 2=101 +relationModifier.phonetic = 2=100 + +# Position attributes may be specified for anchored terms (those +# beginning with "^", which is stripped) and unanchored (those not +# beginning with "^"). This may change when we get a BIB-1 truncation +# attribute that says "do what CQL does". +# +position.first = 3=1 6=1 + # "first in field" +position.any = 3=3 6=1 + # "any position in field" +position.last = 3=4 6=1 + # not a standard BIB-1 attribute +position.firstAndLast = 3=3 6=3 + # search term is anchored to be complete field + +# Structure attributes may be specified for individual relations; a +# default structure attribute my be specified by the pseudo-relation +# "*", to be used whenever a relation not listed here occurs. +# +structure.exact = 4=108 + # string +structure.all = 4=2 +structure.any = 4=2 +structure.* = 4=1 + # phrase + +# Truncation attributes used to implement CQL wildcard patterns. The +# simpler forms, left, right- and both-truncation will be used for the +# simplest patterns, so that we produce PQF queries that conform more +# closely to the Bath Profile. However, when a more complex pattern +# such as "foo*bar" is used, we fall back on Z39.58-style masking. +# +truncation.right = 5=1 +truncation.left = 5=2 +truncation.both = 5=3 +truncation.none = 5=100 +truncation.z3958 = 5=104 + +# Finally, any additional attributes that should always be included +# with each term can be specified in the "always" property. +# +always = 6=1 +# 6=1: completeness = incomplete subfield + + +# Bath Profile support, added Thu Dec 18 13:06:20 GMT 2003 +# See the Bath Profile for SRW at +# http://zing.z3950.org/cql/bath.html +# including the Bath Context Set defined within that document. +# +# In this file, we only map index-names to BIB-1 use attributes, doing +# so in accordance with the specifications of the Z39.50 Bath Profile, +# and leaving the relations, wildcards, etc. to fend for themselves. + +index.bath.keyTitle = 1=33 +index.bath.possessingInstitution = 1=1044 +index.bath.name = 1=1002 +index.bath.personalName = 1=1 +index.bath.corporateName = 1=2 +index.bath.conferenceName = 1=3 +index.bath.uniformTitle = 1=6 +index.bath.isbn = 1=7 +index.bath.issn = 1=8 +index.bath.geographicName = 1=58 +index.bath.notes = 1=63 +index.bath.topicalSubject = 1=1079 +index.bath.genreForm = 1=1075 diff --git a/misc/z3950_responder.pl b/misc/z3950_responder.pl index 729934338d..e6fa623a62 100755 --- a/misc/z3950_responder.pl +++ b/misc/z3950_responder.pl @@ -20,10 +20,11 @@ use Modern::Perl; use Carp; +use File::Basename; use Getopt::Long qw(:config no_ignore_case); use Pod::Usage; -use C4::Context; +use Koha::Config; use Koha::Z3950Responder; =head1 SYNOPSIS @@ -31,7 +32,7 @@ use Koha::Z3950Responder; z3950_responder.pl [-h|--help] [--man] [-a ] [-v ] [-l ] [-u ] [-c ] [-t ] [-k ] [-d ] [-p ] [-C certfile] [-zKiDST1] [-m ] [-w ] [--debug] - [--add-item-status=SUBFIELD] [--prefetch=NUM_RECORDS] + [--add-item-status=SUBFIELD] [--prefetch=NUM_RECORDS] [--config-dir=] [... ] =head1 OPTIONS @@ -51,7 +52,7 @@ Displays manual page and exits. =item B<--debug> -Turns on debug logging to the screen, and turns on single-process mode. +Turns on debug logging to the screen and the single-process mode. =item B<--add-item-status=SUBFIELD> @@ -64,7 +65,12 @@ status string. =item B<--prefetch=NUM_RECORDS> -Number of records to prefetch from Zebra. Defaults to 20. +Number of records to prefetch. Defaults to 20. + +=item B<--config-dir=directory> + +Directory where to find configuration files required for proper operation. Defaults to z3950 under +the Koha config directory. =back @@ -101,6 +107,8 @@ my $debug = 0; my $help; my $man; my $prefetch = 20; +my $config_dir = ''; + my @yaz_options; sub add_yaz_option { @@ -122,6 +130,7 @@ GetOptions( '--add-item-status=s' => \$add_item_status_subfield, '--add-status-multi-subfield' => \$add_status_multi_subfield, '--prefetch=i' => \$prefetch, + '--config-dir=s' => \$config_dir, # Pass through YAZ options. 'a=s' => \&add_yaz_option, 'v=s' => \&add_yaz_option, @@ -152,15 +161,22 @@ if (!@ARGV || $ARGV[-1] =~ /^-/) { push(@ARGV, '@:2100'); } -# Create and start the server. +# If config_dir is not defined, default to z3950 under the Koha config directory +if (!$config_dir) { + (undef, $config_dir) = fileparse(Koha::Config->guess_koha_conf); + $config_dir .= 'z3950/'; +} else { + $config_dir .= '/' if ($config_dir !~ /\/$/); +} -die "This tool only works with Zebra" if C4::Context->preference('SearchEngine') ne 'Zebra'; +# Create and start the server. my $z = Koha::Z3950Responder->new( { add_item_status_subfield => $add_item_status_subfield, add_status_multi_subfield => $add_status_multi_subfield, debug => $debug, num_to_prefetch => $prefetch, + config_dir => $config_dir, yaz_options => [ @yaz_options, @ARGV ], } ); -- 2.39.5