From 9356c050b2fa2453912581f15938e4bc16b2cc32 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Wed, 15 Feb 2017 15:20:27 +0200 Subject: [PATCH] Bug 15108: OAI-PMH provider improvements - Fixed date handling to use UTC as specs require. - Added support for second precision in time stamps. - Added support for marc21 metadata prefix as recommended in the guidelines (synonym for marcxml). - Improved performance of database queries especially for large collections. - Unified functionality of ListRecords and ListIdentifiers to a common base class. - If items are included in the records, their timestamps are taken into account everywhere so that whichever is the most recent (timestamp of biblioitem or any of its items) is considered the record's timestamp. - Fixed OAI.xslt to show correct record range. - Incorporated extended tests from Bug 17493 and their tweaks from Bug 15108. Signed-off-by: Josef Moravec Signed-off-by: Olli-Antti Kivilahti Signed-off-by: Tomas Cohen Arazi Signed-off-by: Kyle M Hall --- Koha/OAI/Server/GetRecord.pm | 88 +++-- Koha/OAI/Server/Identify.pm | 7 +- Koha/OAI/Server/ListBase.pm | 182 ++++++++++ Koha/OAI/Server/ListIdentifiers.pm | 58 +--- Koha/OAI/Server/ListMetadataFormats.pm | 7 +- Koha/OAI/Server/ListRecords.pm | 75 +--- Koha/OAI/Server/Record.pm | 2 +- Koha/OAI/Server/Repository.pm | 27 +- Koha/OAI/Server/ResumptionToken.pm | 15 +- ...ug15108-OAI-PMH_provider_improvements.perl | 11 + installer/data/mysql/kohastructure.sql | 12 +- koha-tmpl/opac-tmpl/xslt/OAI.xslt | 4 +- t/db_dependent/OAI/Server.t | 327 +++++++++++++++--- 13 files changed, 592 insertions(+), 223 deletions(-) create mode 100644 Koha/OAI/Server/ListBase.pm create mode 100644 installer/data/mysql/atomicupdate/Bug15108-OAI-PMH_provider_improvements.perl diff --git a/Koha/OAI/Server/GetRecord.pm b/Koha/OAI/Server/GetRecord.pm index bf0c3aec3a..d4870e3887 100644 --- a/Koha/OAI/Server/GetRecord.pm +++ b/Koha/OAI/Server/GetRecord.pm @@ -32,20 +32,62 @@ sub new { my $self = HTTP::OAI::GetRecord->new(%args); + my $prefix = $repository->{koha_identifier} . ':'; + my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/; + my $items_included = $repository->items_included( $args{metadataPrefix} ); my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare(" + my $sql = " SELECT timestamp FROM biblioitems - WHERE biblionumber=? " ); - my $prefix = $repository->{koha_identifier} . ':'; - my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/; - $sth->execute( $biblionumber ); + WHERE biblionumber=? + "; + my @bind_params = ($biblionumber); + if ( $items_included ) { + # Take latest timestamp of biblio and any items + $sql .= " + UNION + SELECT timestamp from deleteditems + WHERE biblionumber=? + UNION + SELECT timestamp from items + WHERE biblionumber=? + "; + push @bind_params, $biblionumber; + push @bind_params, $biblionumber; + $sql = " + SELECT max(timestamp) + FROM ($sql) bib + "; + } + + my $sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr ); + $sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr ); my ($timestamp, $deleted); - unless ( ($timestamp) = $sth->fetchrow ) { - unless ( ($timestamp) = $dbh->selectrow_array(q/ + unless ( ($timestamp = $sth->fetchrow) ) { + $sql = " SELECT timestamp FROM deletedbiblio - WHERE biblionumber=? /, undef, $biblionumber )) + WHERE biblionumber=? + "; + @bind_params = ($biblionumber); + + if ( $items_included ) { + # Take latest timestamp among biblio and items + $sql .= " + UNION + SELECT timestamp from deleteditems + WHERE biblionumber=? + "; + push @bind_params, $biblionumber; + $sql = " + SELECT max(timestamp) + FROM ($sql) bib + "; + } + + $sth = $dbh->prepare($sql) || die('Could not prepare statement: ' . $dbh->errstr); + $sth->execute( @bind_params ) || die('Could not execute statement: ' . $sth->errstr); + unless ( ($timestamp = $sth->fetchrow) ) { return HTTP::OAI::Response->new( requestURL => $repository->self_url(), @@ -55,28 +97,30 @@ sub new { ) ], ); } - else { - $deleted = 1; - } + $deleted = 1; } - # We fetch it using this method, rather than the database directly, - # so it'll include the item data - my $marcxml; - $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix}) - unless $deleted; my $oai_sets = GetOAISetsBiblio($biblionumber); my @setSpecs; foreach (@$oai_sets) { push @setSpecs, $_->{spec}; } - #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) ); - $self->record( - $deleted - ? Koha::OAI::Server::DeletedRecord->new($timestamp, \@setSpecs, %args) - : Koha::OAI::Server::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args) - ); + if ($deleted) { + $self->record( + Koha::OAI::Server::DeletedRecord->new($timestamp, \@setSpecs, %args) + ); + } else { + # We fetch it using this method, rather than the database directly, + # so it'll include the item data + my $marcxml; + $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix}) + unless $deleted; + + $self->record( + Koha::OAI::Server::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args) + ); + } return $self; } diff --git a/Koha/OAI/Server/Identify.pm b/Koha/OAI/Server/Identify.pm index 79f6b80e1f..dc1a53086a 100644 --- a/Koha/OAI/Server/Identify.pm +++ b/Koha/OAI/Server/Identify.pm @@ -1,5 +1,6 @@ # Copyright Tamil s.a.r.l. 2008-2015 # Copyright Biblibre 2008-2015 +# Copyright The National Library of Finland, University of Helsinki 2016-2017 # # This file is part of Koha. # @@ -33,8 +34,8 @@ sub new { repositoryName => C4::Context->preference("LibraryName"), adminEmail => C4::Context->preference("KohaAdminEmailAddress"), MaxCount => C4::Context->preference("OAI-PMH:MaxCount"), - granularity => 'YYYY-MM-DD', - earliestDatestamp => _get_earliest_datestamp() || '0001-01-01', + granularity => 'YYYY-MM-DDThh:mm:ssZ', + earliestDatestamp => _get_earliest_datestamp() || '0001-01-01T00:00:00Z', deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no', ); @@ -53,7 +54,7 @@ sub new { # will be returned and we will report the fallback 0001-01-01. sub _get_earliest_datestamp { my $dbh = C4::Context->dbh; - my ( $earliest ) = $dbh->selectrow_array("SELECT DATE(MIN(timestamp)) AS earliest FROM biblio" ); + my ( $earliest ) = $dbh->selectrow_array("SELECT MIN(timestamp) AS earliest FROM biblio" ); return $earliest } diff --git a/Koha/OAI/Server/ListBase.pm b/Koha/OAI/Server/ListBase.pm new file mode 100644 index 0000000000..0c7c56a13d --- /dev/null +++ b/Koha/OAI/Server/ListBase.pm @@ -0,0 +1,182 @@ +package Koha::OAI::Server::ListBase; + +# Copyright The National Library of Finland, University of Helsinki 2016-2017 +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +=head1 NAME + +Koha::OAI::Server::ListBase - OAI ListIdentifiers/ListRecords shared functionality + +=head1 DESCRIPTION + +Koha::OAI::Server::ListBase contains OAI-PMH functions shared by ListIdentifiers and ListRecords. + +=cut + +use Modern::Perl; +use C4::Biblio; +use HTTP::OAI; +use Koha::OAI::Server::ResumptionToken; +use Koha::OAI::Server::Record; +use Koha::OAI::Server::DeletedRecord; +use C4::OAI::Sets; +use MARC::File::XML; + +sub GetRecords { + my ($class, $self, $repository, $metadata, %args) = @_; + + my $token = new Koha::OAI::Server::ResumptionToken( %args ); + my $dbh = C4::Context->dbh; + my $set; + if ( defined $token->{'set'} ) { + $set = GetOAISetBySpec($token->{'set'}); + } + my $offset = $token->{offset}; + my $deleted = defined $token->{deleted} ? $token->{deleted} : 0; + my $deleted_count = defined $token->{deleted_count} ? $token->{deleted_count} : 0; + my $max = $repository->{koha_max_count}; + my $count = 0; + my $format = $args{metadataPrefix} || $token->{metadata_prefix}; + my $include_items = $repository->items_included( $format ); + + # Since creating a union of normal and deleted record tables would be a heavy + # operation in a large database, build results in two stages: + # first deleted records ($deleted == 1), then normal records ($deleted == 0) + STAGELOOP: + for ( ; $deleted >= 0; $deleted-- ) { + my $table = $deleted ? 'deletedbiblioitems' : 'biblioitems'; + my $sql = " + SELECT biblionumber + FROM $table + WHERE (timestamp >= ? AND timestamp <= ?) + "; + my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'}); + + if ($include_items) { + $sql .= " + OR biblionumber IN (SELECT biblionumber from deleteditems WHERE timestamp >= ? AND timestamp <= ?) + "; + push @bind_params, ($token->{'from_arg'}, $token->{'until_arg'}); + if (!$deleted) { + $sql .= " + OR biblionumber IN (SELECT biblionumber from items WHERE timestamp >= ? AND timestamp <= ?) + "; + push @bind_params, ($token->{'from_arg'}, $token->{'until_arg'}); + } + } + + $sql .= " + ORDER BY biblionumber + "; + + # Use a subquery for sets since it allows us to use an index in + # biblioitems table and is quite a bit faster than a join. + if (defined $set) { + $sql = " + SELECT bi.* FROM ($sql) bi + WHERE bi.biblionumber in (SELECT osb.biblionumber FROM oai_sets_biblios osb WHERE osb.set_id = ?) + "; + push @bind_params, $set->{'id'}; + } + + $sql .= " + LIMIT " . ($max + 1) . " + OFFSET " . ($offset - $deleted_count); + + my $sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr ); + + if ( $deleted ) { + $sql = " + SELECT MAX(timestamp) + FROM ( + SELECT timestamp FROM deletedbiblioitems WHERE biblionumber = ? + UNION + SELECT timestamp FROM deleteditems WHERE biblionumber = ? + ) bis + "; + } else { + $sql = " + SELECT MAX(timestamp) + FROM ( + SELECT timestamp FROM biblioitems WHERE biblionumber = ? + UNION + SELECT timestamp FROM deleteditems WHERE biblionumber = ? + UNION + SELECT timestamp FROM items WHERE biblionumber = ? + ) bi + "; + } + my $record_sth = $dbh->prepare( $sql ) || die( 'Could not prepare statement: ' . $dbh->errstr ); + + $sth->execute( @bind_params ) || die( 'Could not execute statement: ' . $sth->errstr ); + while ( my ($biblionumber) = $sth->fetchrow ) { + $count++; + if ( $count > $max ) { + $self->resumptionToken( + new Koha::OAI::Server::ResumptionToken( + metadataPrefix => $token->{metadata_prefix}, + from => $token->{from}, + until => $token->{until}, + offset => $token->{offset} + $max, + set => $token->{set}, + deleted => $deleted, + deleted_count => $deleted_count + ) + ); + last STAGELOOP; + } + my @params = $deleted ? ( $biblionumber, $biblionumber ) : ( $biblionumber, $biblionumber, $biblionumber ); + $record_sth->execute( @params ) || die( 'Could not execute statement: ' . $sth->errstr ); + + my ($timestamp) = $record_sth->fetchrow; + + my $oai_sets = GetOAISetsBiblio($biblionumber); + my @setSpecs; + foreach ( @$oai_sets ) { + push @setSpecs, $_->{spec}; + } + if ( $metadata ) { + my $marcxml = !$deleted ? $repository->get_biblio_marcxml($biblionumber, $format) : undef; + if ( $marcxml ) { + $self->record( Koha::OAI::Server::Record->new( + $repository, $marcxml, $timestamp, \@setSpecs, + identifier => $repository->{ koha_identifier } . ':' . $biblionumber, + metadataPrefix => $token->{metadata_prefix} + ) ); + } else { + $self->record( Koha::OAI::Server::DeletedRecord->new( + $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber + ) ); + } + } else { + $timestamp =~ s/ /T/; + $timestamp .= 'Z'; + $self->identifier( new HTTP::OAI::Header( + identifier => $repository->{ koha_identifier} . ':' . $biblionumber, + datestamp => $timestamp, + status => $deleted ? 'deleted' : undef + ) ); + } + } + # Store offset and deleted record count + $offset += $count; + $deleted_count = $offset if ($deleted); + } + return $count; +} + +1; diff --git a/Koha/OAI/Server/ListIdentifiers.pm b/Koha/OAI/Server/ListIdentifiers.pm index 8b6e830946..8793f66928 100644 --- a/Koha/OAI/Server/ListIdentifiers.pm +++ b/Koha/OAI/Server/ListIdentifiers.pm @@ -1,5 +1,6 @@ # Copyright Tamil s.a.r.l. 2008-2015 # Copyright Biblibre 2008-2015 +# Copyright The National Library of Finland, University of Helsinki 2016 # # This file is part of Koha. # @@ -20,68 +21,15 @@ package Koha::OAI::Server::ListIdentifiers; use Modern::Perl; use HTTP::OAI; -use C4::OAI::Sets; - -use base ("HTTP::OAI::ListIdentifiers"); +use base qw(HTTP::OAI::ListIdentifiers Koha::OAI::Server::ListBase); sub new { my ($class, $repository, %args) = @_; my $self = HTTP::OAI::ListIdentifiers->new(%args); - my $token = new Koha::OAI::Server::ResumptionToken( %args ); - my $dbh = C4::Context->dbh; - my $set; - if(defined $token->{'set'}) { - $set = GetOAISetBySpec($token->{'set'}); - } - my $max = $repository->{koha_max_count}; - my $sql = " - (SELECT biblioitems.biblionumber, biblioitems.timestamp - FROM biblioitems - "; - $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set; - $sql .= " WHERE timestamp >= ? AND timestamp <= ? "; - $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set; - $sql .= ") UNION - (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio"; - $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set; - $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? "; - $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set; - - $sql .= ") ORDER BY biblionumber - LIMIT " . ($max+1) . " - OFFSET $token->{offset} - "; - my $sth = $dbh->prepare( $sql ); - my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'}); - push @bind_params, $set->{'id'} if defined $set; - push @bind_params, ($token->{'from'}, $token->{'until'}); - push @bind_params, $set->{'id'} if defined $set; - $sth->execute( @bind_params ); - - my $count = 0; - while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) { - $count++; - if ( $count > $max ) { - $self->resumptionToken( - new Koha::OAI::Server::ResumptionToken( - metadataPrefix => $token->{metadata_prefix}, - from => $token->{from}, - until => $token->{until}, - offset => $token->{offset} + $max, - set => $token->{set} - ) - ); - last; - } - $timestamp =~ s/ /T/, $timestamp .= 'Z'; - $self->identifier( new HTTP::OAI::Header( - identifier => $repository->{ koha_identifier} . ':' . $biblionumber, - datestamp => $timestamp, - ) ); - } + my $count = $class->GetRecords($self, $repository, 0, %args); # Return error if no results unless ($count) { diff --git a/Koha/OAI/Server/ListMetadataFormats.pm b/Koha/OAI/Server/ListMetadataFormats.pm index 47463a7555..f6849cdde0 100644 --- a/Koha/OAI/Server/ListMetadataFormats.pm +++ b/Koha/OAI/Server/ListMetadataFormats.pm @@ -1,5 +1,6 @@ # Copyright Tamil s.a.r.l. 2008-2015 # Copyright Biblibre 2008-2015 +# Copyright The National Library of Finland, University of Helsinki 2016 # # This file is part of Koha. # @@ -23,7 +24,6 @@ use HTTP::OAI; use base ("HTTP::OAI::ListMetadataFormats"); - sub new { my ($class, $repository) = @_; @@ -44,6 +44,11 @@ sub new { schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/' ) ); + $self->metadataFormat( HTTP::OAI::MetadataFormat->new( + metadataPrefix => 'marc21', + schema => 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', + metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim' + ) ); $self->metadataFormat( HTTP::OAI::MetadataFormat->new( metadataPrefix => 'marcxml', schema => 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', diff --git a/Koha/OAI/Server/ListRecords.pm b/Koha/OAI/Server/ListRecords.pm index d395eedd43..ff1c786395 100644 --- a/Koha/OAI/Server/ListRecords.pm +++ b/Koha/OAI/Server/ListRecords.pm @@ -1,5 +1,6 @@ # Copyright Tamil s.a.r.l. 2008-2015 # Copyright Biblibre 2008-2015 +# Copyright The National Library of Finland, University of Helsinki 2016 # # This file is part of Koha. # @@ -19,86 +20,16 @@ package Koha::OAI::Server::ListRecords; use Modern::Perl; -use C4::Biblio; use HTTP::OAI; -use Koha::OAI::Server::ResumptionToken; -use Koha::OAI::Server::Record; -use Koha::OAI::Server::DeletedRecord; -use C4::OAI::Sets; -use MARC::File::XML; - -use base ("HTTP::OAI::ListRecords"); +use base qw(HTTP::OAI::ListRecords Koha::OAI::Server::ListBase); sub new { my ($class, $repository, %args) = @_; my $self = HTTP::OAI::ListRecords->new(%args); - my $token = new Koha::OAI::Server::ResumptionToken( %args ); - my $dbh = C4::Context->dbh; - my $set; - if(defined $token->{'set'}) { - $set = GetOAISetBySpec($token->{'set'}); - } - my $max = $repository->{koha_max_count}; - my $sql = " - (SELECT biblioitems.biblionumber, biblioitems.timestamp - FROM biblioitems - "; - $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set; - $sql .= " WHERE timestamp >= ? AND timestamp <= ? "; - $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set; - $sql .= ") UNION - (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio"; - $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set; - $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? "; - $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set; - - $sql .= ") ORDER BY biblionumber - LIMIT " . ($max + 1) . " - OFFSET $token->{offset} - "; - my $sth = $dbh->prepare( $sql ); - my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'}); - push @bind_params, $set->{'id'} if defined $set; - push @bind_params, ($token->{'from'}, $token->{'until'}); - push @bind_params, $set->{'id'} if defined $set; - $sth->execute( @bind_params ); - - my $count = 0; - my $format = $args{metadataPrefix} || $token->{metadata_prefix}; - while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) { - $count++; - if ( $count > $max ) { - $self->resumptionToken( - new Koha::OAI::Server::ResumptionToken( - metadataPrefix => $token->{metadata_prefix}, - from => $token->{from}, - until => $token->{until}, - offset => $token->{offset} + $max, - set => $token->{set} - ) - ); - last; - } - my $marcxml = $repository->get_biblio_marcxml($biblionumber, $format); - my $oai_sets = GetOAISetsBiblio($biblionumber); - my @setSpecs; - foreach (@$oai_sets) { - push @setSpecs, $_->{spec}; - } - if ($marcxml) { - $self->record( Koha::OAI::Server::Record->new( - $repository, $marcxml, $timestamp, \@setSpecs, - identifier => $repository->{ koha_identifier } . ':' . $biblionumber, - metadataPrefix => $token->{metadata_prefix} - ) ); - } else { - $self->record( Koha::OAI::Server::DeletedRecord->new( - $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) ); - } - } + my $count = $class->GetRecords($self, $repository, 1, %args); # Return error if no results unless ($count) { diff --git a/Koha/OAI/Server/Record.pm b/Koha/OAI/Server/Record.pm index 542dfc4b30..cf86495d20 100644 --- a/Koha/OAI/Server/Record.pm +++ b/Koha/OAI/Server/Record.pm @@ -43,7 +43,7 @@ sub new { my $parser = XML::LibXML->new(); my $record_dom = $parser->parse_string( $marcxml ); my $format = $args{metadataPrefix}; - if ( $format ne 'marcxml' ) { + if ( $format ne 'marc21' && $format ne 'marcxml' ) { my %args = ( OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'" ); diff --git a/Koha/OAI/Server/Repository.pm b/Koha/OAI/Server/Repository.pm index f4f30f6ece..50891c0db5 100644 --- a/Koha/OAI/Server/Repository.pm +++ b/Koha/OAI/Server/Repository.pm @@ -1,5 +1,6 @@ # Copyright Tamil s.a.r.l. 2008-2015 # Copyright Biblibre 2008-2015 +# Copyright The National Library of Finland, University of Helsinki 2016 # # This file is part of Koha. # @@ -77,6 +78,11 @@ mode. A configuration file koha-oai.conf can look like that: metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd xsl_file: /usr/local/koha/xslt/vs.xsl + marc21: + metadataPrefix: marc21 + metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim + schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd + include_items: 1 marcxml: metadataPrefix: marxml metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim @@ -88,7 +94,7 @@ mode. A configuration file koha-oai.conf can look like that: schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl -Note de 'include_items' parameter which is the only mean to return item-level info. +Note the 'include_items' parameter which is the only mean to return item-level info. =cut @@ -99,7 +105,7 @@ sub new { $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID"); $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount"); - $self->{ koha_metadata_format } = ['oai_dc', 'marcxml']; + $self->{ koha_metadata_format } = ['oai_dc', 'marc21', 'marcxml']; $self->{ koha_stylesheet } = { }; # Build when needed # Load configuration file if defined in OAI-PMH:ConfFile syspref @@ -109,10 +115,14 @@ sub new { $self->{ koha_metadata_format } = \@formats; } + # OAI-PMH handles dates in UTC, so do that on the database level to avoid need for + # any conversions + C4::Context->dbh->prepare("SET time_zone='+00:00'")->execute(); + # Check for grammatical errors in the request my @errs = validate_request( CGI::Vars() ); - # Is metadataPrefix supported by the respository? + # Is metadataPrefix supported by the repository? my $mdp = param('metadataPrefix') || ''; if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) { push @errs, new HTTP::OAI::Error( @@ -166,6 +176,7 @@ sub stylesheet { '/prog/en/xslt/' . C4::Context->preference('marcflavour') . 'slim2OAIDC.xsl' ); + $xsl_file || die( "No stylesheet found for $format" ); my $parser = XML::LibXML->new(); my $xslt = XML::LibXSLT->new(); my $style_doc = $parser->parse_file( $xsl_file ); @@ -176,4 +187,14 @@ sub stylesheet { return $stylesheet; } + +sub items_included { + my ( $self, $format ) = @_; + + if ( my $conf = $self->{ conf } ) { + return $conf->{ format }->{ $format }->{ include_items }; + } + return 0; +} + 1; diff --git a/Koha/OAI/Server/ResumptionToken.pm b/Koha/OAI/Server/ResumptionToken.pm index 9798059218..ee97771ca5 100644 --- a/Koha/OAI/Server/ResumptionToken.pm +++ b/Koha/OAI/Server/ResumptionToken.pm @@ -1,5 +1,6 @@ # Copyright Tamil s.a.r.l. 2008-2015 # Copyright Biblibre 2008-2015 +# Copyright The National Library of Finland, University of Helsinki 2016-2017 # # This file is part of Koha. # @@ -31,16 +32,16 @@ use base ("HTTP::OAI::ResumptionToken"); # - from # - until # - offset - +# - deleted sub new { my ($class, %args) = @_; my $self = $class->SUPER::new(%args); - my ($metadata_prefix, $offset, $from, $until, $set); + my ($metadata_prefix, $offset, $from, $until, $set, $deleted, $deleted_count); if ( $args{ resumptionToken } ) { - ($metadata_prefix, $offset, $from, $until, $set) + ($metadata_prefix, $offset, $from, $until, $set, $deleted, $deleted_count) = split( '/', $args{resumptionToken} ); } else { @@ -55,7 +56,9 @@ sub new { $from .= 'T00:00:00Z' if length($from) == 10; $until .= 'T23:59:59Z' if length($until) == 10; $offset = $args{ offset } || 0; - $set = $args{set} || ''; + $set = $args{ set } || ''; + $deleted = defined $args{ deleted } ? $args{ deleted } : 1; + $deleted_count = defined $args{ deleted_count } ? $args{ deleted_count } : 0; } $self->{ metadata_prefix } = $metadata_prefix; @@ -65,9 +68,11 @@ sub new { $self->{ set } = $set; $self->{ from_arg } = _strip_UTC_designators($from); $self->{ until_arg } = _strip_UTC_designators($until); + $self->{ deleted } = $deleted; + $self->{ deleted_count } = $deleted_count; $self->resumptionToken( - join( '/', $metadata_prefix, $offset, $from, $until, $set ) ); + join( '/', $metadata_prefix, $offset, $from, $until, $set, $deleted, $deleted_count ) ); $self->cursor( $offset ); return $self; diff --git a/installer/data/mysql/atomicupdate/Bug15108-OAI-PMH_provider_improvements.perl b/installer/data/mysql/atomicupdate/Bug15108-OAI-PMH_provider_improvements.perl new file mode 100644 index 0000000000..9a252cf5ce --- /dev/null +++ b/installer/data/mysql/atomicupdate/Bug15108-OAI-PMH_provider_improvements.perl @@ -0,0 +1,11 @@ +$DBversion = 'XXX'; # will be replaced by the RM +if( CheckVersion( $DBversion ) ) { + $dbh->do("ALTER TABLE biblioitems ADD KEY `timestamp` (`timestamp`);"); + $dbh->do("ALTER TABLE deletedbiblioitems ADD KEY `timestamp` (`timestamp`);"); + $dbh->do("ALTER TABLE items ADD KEY `timestamp` (`timestamp`);"); + $dbh->do("ALTER TABLE deleteditems ADD KEY `timestamp` (`timestamp`);"); + + # Always end with this (adjust the bug info) + SetVersion( $DBversion ); + print "Upgrade to $DBversion done (Bug 15108 - OAI-PMH provider improvements)\n"; +} diff --git a/installer/data/mysql/kohastructure.sql b/installer/data/mysql/kohastructure.sql index 94ef125881..53fdd863b8 100644 --- a/installer/data/mysql/kohastructure.sql +++ b/installer/data/mysql/kohastructure.sql @@ -200,6 +200,7 @@ CREATE TABLE `biblioitems` ( -- information related to bibliographic records in KEY `isbn` (`isbn`(255)), KEY `issn` (`issn`(255)), KEY `publishercode` (`publishercode`), + KEY `timestamp` (`timestamp`), CONSTRAINT `biblioitems_ibfk_1` FOREIGN KEY (`biblionumber`) REFERENCES `biblio` (`biblionumber`) ON DELETE CASCADE ON UPDATE CASCADE ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; @@ -553,7 +554,8 @@ CREATE TABLE `deletedbiblioitems` ( -- information about bibliographic records t KEY `bibnoidx` (`biblionumber`), KEY `itemtype_idx` (`itemtype`), KEY `isbn` (`isbn`(255)), - KEY `publishercode` (`publishercode`) + KEY `publishercode` (`publishercode`), + KEY `timestamp` (`timestamp`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -- @@ -695,7 +697,8 @@ CREATE TABLE `deleteditems` ( KEY `delitembibnoidx` (`biblionumber`), KEY `delhomebranch` (`homebranch`), KEY `delholdingbranch` (`holdingbranch`), - KEY `itype_idx` (`itype`) + KEY `itype_idx` (`itype`), + KEY `timestamp` (`timestamp`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -- @@ -901,7 +904,7 @@ CREATE TABLE `refund_lost_item_fee_rules` ( -- refund lost item fee rules tbale -- DROP TABLE IF EXISTS `items`; -CREATE TABLE `items` ( -- holdings/item information +CREATE TABLE `items` ( -- holdings/item information `itemnumber` int(11) NOT NULL auto_increment, -- primary key and unique identifier added by Koha `biblionumber` int(11) NOT NULL default 0, -- foreign key from biblio table used to link this item to the right bib record `biblioitemnumber` int(11) NOT NULL default 0, -- foreign key from the biblioitems table to link to item to additional information @@ -957,6 +960,7 @@ CREATE TABLE `items` ( -- holdings/item information KEY `items_location` (`location`), KEY `items_ccode` (`ccode`), KEY `itype_idx` (`itype`), + KEY `timestamp` (`timestamp`), CONSTRAINT `items_ibfk_1` FOREIGN KEY (`biblioitemnumber`) REFERENCES `biblioitems` (`biblioitemnumber`) ON DELETE CASCADE ON UPDATE CASCADE, CONSTRAINT `items_ibfk_2` FOREIGN KEY (`homebranch`) REFERENCES `branches` (`branchcode`) ON UPDATE CASCADE, CONSTRAINT `items_ibfk_3` FOREIGN KEY (`holdingbranch`) REFERENCES `branches` (`branchcode`) ON UPDATE CASCADE, @@ -2249,7 +2253,7 @@ CREATE TABLE `userflags` ( -- DROP TABLE IF EXISTS `virtualshelves`; -CREATE TABLE `virtualshelves` ( -- information about lists (or virtual shelves) +CREATE TABLE `virtualshelves` ( -- information about lists (or virtual shelves) `shelfnumber` int(11) NOT NULL auto_increment, -- unique identifier assigned by Koha `shelfname` varchar(255) default NULL, -- name of the list `owner` int default NULL, -- foreign key linking to the borrowers table (using borrowernumber) for the creator of this list (changed from varchar(80) to int) diff --git a/koha-tmpl/opac-tmpl/xslt/OAI.xslt b/koha-tmpl/opac-tmpl/xslt/OAI.xslt index d9facccbe3..8a9f490282 100644 --- a/koha-tmpl/opac-tmpl/xslt/OAI.xslt +++ b/koha-tmpl/opac-tmpl/xslt/OAI.xslt @@ -531,9 +531,9 @@ - + - - + diff --git a/t/db_dependent/OAI/Server.t b/t/db_dependent/OAI/Server.t index 05c2d87a10..4e1215cd99 100644 --- a/t/db_dependent/OAI/Server.t +++ b/t/db_dependent/OAI/Server.t @@ -1,6 +1,6 @@ #!/usr/bin/perl -# Copyright Tamil s.a.r.l. 2015 +# Copyright Tamil s.a.r.l. 2016 # # This file is part of Koha. # @@ -17,23 +17,27 @@ # You should have received a copy of the GNU General Public License # along with Koha; if not, see . - use Modern::Perl; -use C4::Context; -use C4::Biblio; -use Test::More tests => 13; + +use Test::More tests => 28; +use DateTime; use Test::MockModule; use Test::Warn; -use DateTime; use XML::Simple; +use YAML; + use t::lib::Mocks; +use C4::Biblio; +use C4::Context; +use Koha::Database; BEGIN { use_ok('Koha::OAI::Server::DeletedRecord'); use_ok('Koha::OAI::Server::Description'); use_ok('Koha::OAI::Server::GetRecord'); use_ok('Koha::OAI::Server::Identify'); + use_ok('Koha::OAI::Server::ListBase'); use_ok('Koha::OAI::Server::ListIdentifiers'); use_ok('Koha::OAI::Server::ListMetadataFormats'); use_ok('Koha::OAI::Server::ListRecords'); @@ -43,52 +47,107 @@ BEGIN { use_ok('Koha::OAI::Server::ResumptionToken'); } +use constant NUMBER_OF_MARC_RECORDS => 10; # Mocked CGI module in order to be able to send CGI parameters to OAI Server my %param; my $module = Test::MockModule->new('CGI'); $module->mock('Vars', sub { %param; }); +my $schema = Koha::Database->schema; +$schema->storage->txn_begin; my $dbh = C4::Context->dbh; -$dbh->{AutoCommit} = 0; -$dbh->{RaiseError} = 1; -$dbh->do('DELETE FROM issues'); + +$dbh->do("SET time_zone='+00:00'"); $dbh->do('DELETE FROM biblio'); -$dbh->do('DELETE FROM biblioitems'); -$dbh->do('DELETE FROM items'); +$dbh->do('DELETE FROM deletedbiblio'); +$dbh->do('DELETE FROM deletedbiblioitems'); +$dbh->do('DELETE FROM deleteditems'); -# Add 10 biblio records -my @bibs = map { +my $date_added = DateTime->now() . 'Z'; +my $date_to = substr($date_added, 0, 10) . 'T23:59:59Z'; +my (@header, @marcxml, @oaidc); +my $sth = $dbh->prepare('SELECT timestamp FROM biblioitems WHERE biblionumber=?'); + +# Add biblio records +foreach my $index ( 0 .. NUMBER_OF_MARC_RECORDS - 1 ) { my $record = MARC::Record->new(); - $record->append_fields( MARC::Field->new('245', '', '', 'a' => "Title $_" ) ); + $record->append_fields( MARC::Field->new('245', '', '', 'a' => "Title $index" ) ); my ($biblionumber) = AddBiblio($record, ''); - $biblionumber; -} (1..10); - -t::lib::Mocks::mock_preference('LibraryName', 'My Library'); -t::lib::Mocks::mock_preference('OAI::PMH', 1); -t::lib::Mocks::mock_preference('OAI-PMH:archiveID', 'TEST'); -t::lib::Mocks::mock_preference('OAI-PMH:ConfFile', '' ); -t::lib::Mocks::mock_preference('OAI-PMH:MaxCount', 3); -t::lib::Mocks::mock_preference('OAI-PMH:DeletedRecord', 'persistent'); - -%param = ( verb => 'ListMetadataFormats' ); -my $response; -my $get_response = sub { - my $stdout; - local *STDOUT; - open STDOUT, '>', \$stdout; - Koha::OAI::Server::Repository->new(); - $response = XMLin($stdout); + $sth->execute($biblionumber); + my $timestamp = $sth->fetchrow_array . 'Z'; + $timestamp =~ s/ /T/; + $timestamp = manipulate_timestamp( $index, $biblionumber, $timestamp ); + $record = GetMarcBiblio($biblionumber); + $record = XMLin($record->as_xml_record); + push @header, { datestamp => $timestamp, identifier => "TEST:$biblionumber" }; + push @oaidc, { + header => $header[$index], + metadata => { + 'oai_dc:dc' => { + 'dc:title' => "Title $index", + 'dc:language' => {}, + 'dc:type' => {}, + 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', + 'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/', + 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/', + 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd', + }, + }, + }; + push @marcxml, { + header => $header[$index], + metadata => { + record => $record, + }, + }; +} + +my $syspref = { + 'LibraryName' => 'My Library', + 'OAI::PMH' => 1, + 'OAI-PMH:archiveID' => 'TEST', + 'OAI-PMH:ConfFile' => '', + 'OAI-PMH:MaxCount' => 3, + 'OAI-PMH:DeletedRecord' => 'persistent', }; -$get_response->(); -my $now = DateTime->now . 'Z'; -my $expected = { - request => 'http://localhost', - responseDate => $now, - xmlns => 'http://www.openarchives.org/OAI/2.0/', - 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', - 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd', +while ( my ($name, $value) = each %$syspref ) { + t::lib::Mocks::mock_preference( $name => $value ); +} + +sub test_query { + my ($test, $param, $expected) = @_; + + %param = %$param; + my %full_expected = ( + %$expected, + ( + request => 'http://localhost', + responseDate => DateTime->now . 'Z', + xmlns => 'http://www.openarchives.org/OAI/2.0/', + 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', + 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd', + ) + ); + + my $response; + { + my $stdout; + local *STDOUT; + open STDOUT, '>', \$stdout; + Koha::OAI::Server::Repository->new(); + $response = XMLin($stdout); + } + + unless (is_deeply($response, \%full_expected, $test)) { + diag + "PARAM:" . Dump($param) . + "EXPECTED:" . Dump(\%full_expected) . + "RESPONSE:" . Dump($response); + } +} + +test_query('ListMetadataFormats', {verb => 'ListMetadataFormats'}, { ListMetadataFormats => { metadataFormat => [ { @@ -96,6 +155,11 @@ my $expected = { metadataPrefix=> 'oai_dc', schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', }, + { + metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim', + metadataPrefix => 'marc21', + schema => 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', + }, { metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim', metadataPrefix => 'marcxml', @@ -103,23 +167,176 @@ my $expected = { }, ], }, -}; -is_deeply($response, $expected, "ListMetadataFormats"); - -%param = ( verb => 'ListIdentifiers' ); -$get_response->(); -$now = DateTime->now . 'Z'; -$expected = { - request => 'http://localhost', - responseDate => $now, - xmlns => 'http://www.openarchives.org/OAI/2.0/', - 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', - 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd', +}); + +test_query('ListIdentifiers without metadataPrefix', {verb => 'ListIdentifiers'}, { error => { code => 'badArgument', content => "Required argument 'metadataPrefix' was undefined", }, -}; -is_deeply($response, $expected, "ListIdentifiers without metadaPrefix argument"); +}); + +test_query('ListIdentifiers', {verb => 'ListIdentifiers', metadataPrefix => 'marcxml'}, { + ListIdentifiers => { + header => [ @header[0..2] ], + resumptionToken => { + content => "marcxml/3/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 3, + }, + }, +}); + +test_query('ListIdentifiers', {verb => 'ListIdentifiers', metadataPrefix => 'marcxml'}, { + ListIdentifiers => { + header => [ @header[0..2] ], + resumptionToken => { + content => "marcxml/3/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 3, + }, + }, +}); -$dbh->rollback; +test_query( + 'ListIdentifiers with resumptionToken 1', + { verb => 'ListIdentifiers', resumptionToken => "marcxml/3/1970-01-01T00:00:00Z/$date_to//0/0" }, + { + ListIdentifiers => { + header => [ @header[3..5] ], + resumptionToken => { + content => "marcxml/6/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 6, + }, + }, + }, +); + +test_query( + 'ListIdentifiers with resumptionToken 2', + { verb => 'ListIdentifiers', resumptionToken => "marcxml/6/1970-01-01T00:00:00Z/$date_to//0/0" }, + { + ListIdentifiers => { + header => [ @header[6..8] ], + resumptionToken => { + content => "marcxml/9/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 9, + }, + }, + }, +); + +test_query( + 'ListIdentifiers with resumptionToken 3, response without resumption', + { verb => 'ListIdentifiers', resumptionToken => "marcxml/9/1970-01-01T00:00:00Z/$date_to//0/0" }, + { + ListIdentifiers => { + header => $header[9], + }, + }, +); + +test_query('ListRecords marcxml without metadataPrefix', {verb => 'ListRecords'}, { + error => { + code => 'badArgument', + content => "Required argument 'metadataPrefix' was undefined", + }, +}); + +test_query('ListRecords marcxml', {verb => 'ListRecords', metadataPrefix => 'marcxml'}, { + ListRecords => { + record => [ @marcxml[0..2] ], + resumptionToken => { + content => "marcxml/3/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 3, + }, + }, +}); + +test_query( + 'ListRecords marcxml with resumptionToken 1', + { verb => 'ListRecords', resumptionToken => "marcxml/3/1970-01-01T00:00:00Z/$date_to//0/0" }, + { ListRecords => { + record => [ @marcxml[3..5] ], + resumptionToken => { + content => "marcxml/6/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 6, + }, + }, +}); + +test_query( + 'ListRecords marcxml with resumptionToken 2', + { verb => 'ListRecords', resumptionToken => "marcxml/6/1970-01-01T00:00:00Z/$date_to//0/0" }, + { ListRecords => { + record => [ @marcxml[6..8] ], + resumptionToken => { + content => "marcxml/9/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 9, + }, + }, +}); + +# Last record, so no resumption token +test_query( + 'ListRecords marcxml with resumptionToken 3, response without resumption', + { verb => 'ListRecords', resumptionToken => "marcxml/9/1970-01-01T00:00:00Z/$date_to//0/0" }, + { ListRecords => { + record => $marcxml[9], + }, +}); + +test_query('ListRecords oai_dc', {verb => 'ListRecords', metadataPrefix => 'oai_dc'}, { + ListRecords => { + record => [ @oaidc[0..2] ], + resumptionToken => { + content => "oai_dc/3/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 3, + }, + }, +}); + +test_query( + 'ListRecords oai_dc with resumptionToken 1', + { verb => 'ListRecords', resumptionToken => "oai_dc/3/1970-01-01T00:00:00Z/$date_to//0/0" }, + { ListRecords => { + record => [ @oaidc[3..5] ], + resumptionToken => { + content => "oai_dc/6/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 6, + }, + }, +}); + +test_query( + 'ListRecords oai_dc with resumptionToken 2', + { verb => 'ListRecords', resumptionToken => "oai_dc/6/1970-01-01T00:00:00Z/$date_to//0/0" }, + { ListRecords => { + record => [ @oaidc[6..8] ], + resumptionToken => { + content => "oai_dc/9/1970-01-01T00:00:00Z/$date_to//0/0", + cursor => 9, + }, + }, +}); + +# Last record, so no resumption token +test_query( + 'ListRecords oai_dc with resumptionToken 3, response without resumption', + { verb => 'ListRecords', resumptionToken => "oai_dc/9/1970-01-01T00:00:00Z/$date_to//0/0" }, + { ListRecords => { + record => $oaidc[9], + }, +}); + +$schema->storage->txn_rollback; + +sub manipulate_timestamp { +# This eliminates waiting a few seconds in order to get a higher timestamp +# Works only for 60 records.. + my ( $index, $bibno, $timestamp ) = @_; + return $timestamp if $timestamp !~ /\d{2}Z/; + my $secs = sprintf( "%02d", $index ); + $timestamp =~ s/\d{2}Z/${secs}Z/; + $dbh->do("UPDATE biblioitems SET timestamp=? WHERE biblionumber=?", undef, + ( $timestamp, $bibno )); + return $timestamp; +} -- 2.39.5