From f2577300505a3a6053743c80bd54b2cb1a625ec6 Mon Sep 17 00:00:00 2001 From: Frederic Demians Date: Thu, 26 Mar 2009 21:54:24 +0100 Subject: [PATCH] Add another OAI server This OAI server has those characteristics: * It doesn't use C4::OAI anymore. * It uses HTTP::OAI library which is easily expandable. * XML result is not build anymore directly but using XML::SAX::Writer. It should solve encoding and XML related issues. * It returns two formats: oai_dc and marcxml. * DC is generated from biblioitems.marcxml by applying XSLT (MARC21slim2OAIDC.xsl or UNIMARCslim2OAIDC.xsl). * It uses the same tokens as previous OAI server (thanks Philippe Jaillon). * There is no more sets (it wasn't operational in previous version). Add two new dependencies: * HTTP::OAI * XML::SAX::Writer available as a Debian package: libxml-sax-writer-perl Signed-off-by: Galen Charlton --- opac/oai2.pl | 471 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 471 insertions(+) create mode 100755 opac/oai2.pl diff --git a/opac/oai2.pl b/opac/oai2.pl new file mode 100755 index 0000000000..37b9f6f66e --- /dev/null +++ b/opac/oai2.pl @@ -0,0 +1,471 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use diagnostics; + +use CGI qw/:standard -oldstyle_urls/; +use vars qw( $GZIP ); +use C4::Context; + + +BEGIN { + eval { require PerlIO::gzip }; + $GZIP = $@ ? 0 : 1; +} + +unless ( C4::Context->preference('OAI-PMH') ) { + print + header( + -type => 'text/plain; charset=utf-8', + -charset => 'utf-8', + -status => '404 OAI-PMH service is disabled', + ), + "OAI-PMH service is disabled"; + exit; +} + +my @encodings = http('HTTP_ACCEPT_ENCODING'); +if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) { + print header( + -type => 'text/xml; charset=utf-8', + -charset => 'utf-8', + -Content-Encoding => 'gzip', + ); + binmode( STDOUT, ":gzip" ); +} +else { + print header( + -type => 'text/xml; charset=utf-8', + -charset => 'utf-8', + ); +} + +binmode( STDOUT, ":utf8" ); +my $repository = C4::OAI::Repository->new(); + +# __END__ Main Prog + + +# +# Extends HTTP::OAI::ResumptionToken +# A token is identified by: +# - metadataPrefix +# - from +# - until +# - offset +# +package C4::OAI::ResumptionToken; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; + +use base ("HTTP::OAI::ResumptionToken"); + + +sub new { + my ($class, %args) = @_; + + my $self = $class->SUPER::new(%args); + + my ($metadata_prefix, $offset, $from, $until); + if ( $args{ resumptionToken } ) { + ($metadata_prefix, $offset, $from, $until) + = split( ':', $args{resumptionToken} ); + } + else { + $metadata_prefix = $args{ metadataPrefix }; + $from = $args{ from } || '1970-01-01'; + $until = $args{ until }; + unless ( $until) { + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time ); + $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday ); + } + $offset = $args{ offset } || 0; + } + + $self->{ metadata_prefix } = $metadata_prefix; + $self->{ offset } = $offset; + $self->{ from } = $from; + $self->{ until } = $until; + + $self->resumptionToken( + join( ':', $metadata_prefix, $offset, $from, $until ) ); + $self->cursor( $offset ); + + return $self; +} + +# __END__ C4::OAI::ResumptionToken + + + +package C4::OAI::Identify; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; +use C4::Context; + +use base ("HTTP::OAI::Identify"); + +sub new { + my ($class, $repository) = @_; + + my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/; + my $self = $class->SUPER::new( + baseURL => $baseURL, + repositoryName => C4::Context->preference("LibraryName"), + adminEmail => C4::Context->preference("KohaAdminEmailAddress"), + MaxCount => C4::Context->preference("OAI-PMH:MaxCount"), + granularity => 'YYYY-MM-DD', + earliestDatestamp => '0001-01-01', + ); + $self->description( "Koha OAI Repository" ); + $self->compression( 'gzip' ); + + return $self; +} + +# __END__ C4::OAI::Identify + + + +package C4::OAI::ListMetadataFormats; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; + +use base ("HTTP::OAI::ListMetadataFormats"); + +sub new { + my ($class, $repository) = @_; + + my $self = $class->SUPER::new(); + + $self->metadataFormat( HTTP::OAI::MetadataFormat->new( + metadataPrefix => 'oai_dc', + schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', + metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/' + ) ); + $self->metadataFormat( HTTP::OAI::MetadataFormat->new( + metadataPrefix => 'marcxml', + schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd', + metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim' + ) ); + + return $self; +} + +# __END__ C4::OAI::ListMetadataFormats + + + +package C4::OAI::Record; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; +use HTTP::OAI::Metadata::OAI_DC; + +use base ("HTTP::OAI::Record"); + +sub new { + my ($class, $repository, $marcxml, $timestamp, %args) = @_; + + my $self = $class->SUPER::new(%args); + + $timestamp =~ s/ /T/, $timestamp .= 'Z'; + $self->header( new HTTP::OAI::Header( + identifier => $args{identifier}, + datestamp => $timestamp, + ) ); + + my $parser = XML::LibXML->new(); + my $record_dom = $parser->parse_string( $marcxml ); + if ( $args{metadataPrefix} ne 'marcxml' ) { + $record_dom = $repository->oai_dc_stylesheet()->transform( $record_dom ); + } + $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) ); + + return $self; +} + +# __END__ C4::OAI::Record + + + +package C4::OAI::GetRecord; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; + +use base ("HTTP::OAI::GetRecord"); + + +sub new { + my ($class, $repository, %args) = @_; + + my $self = HTTP::OAI::GetRecord->new(%args); + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare(" + SELECT marcxml, timestamp + FROM biblioitems + WHERE biblionumber=? " ); + my $prefix = $repository->{koha_identifier} . ':'; + my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/; + $sth->execute( $biblionumber ); + my ($marcxml, $timestamp); + unless ( ($marcxml, $timestamp) = $sth->fetchrow ) { + return HTTP::OAI::Response->new( + requestURL => $repository->self_url(), + errors => [ new HTTP::OAI::Error( + code => 'idDoesNotExist', + message => "There is no biblio record with this identifier", + ) ] , + ); + } + + #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) ); + $self->record( C4::OAI::Record->new( + $repository, $marcxml, $timestamp, %args ) ); + + return $self; +} + +# __END__ C4::OAI::GetRecord + + + +package C4::OAI::ListIdentifiers; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; + +use base ("HTTP::OAI::ListIdentifiers"); + + +sub new { + my ($class, $repository, %args) = @_; + + my $self = HTTP::OAI::ListIdentifiers->new(%args); + + my $token = new C4::OAI::ResumptionToken( %args ); + my $dbh = C4::Context->dbh; + my $sql = "SELECT biblionumber, timestamp + FROM biblioitems + WHERE timestamp >= ? AND timestamp <= ? + LIMIT " . $repository->{koha_max_count} . " + OFFSET " . $token->{offset}; + my $sth = $dbh->prepare( $sql ); + $sth->execute( $token->{from}, $token->{until} ); + + my $pos = $token->{offset}; + while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) { + $timestamp =~ s/ /T/, $timestamp .= 'Z'; + $self->identifier( new HTTP::OAI::Header( + identifier => $repository->{ koha_identifier} . ':' . $biblionumber, + datestamp => $timestamp, + ) ); + $pos++; + } + $self->resumptionToken( new C4::OAI::ResumptionToken( + metadataPrefix => $token->{metadata_prefix}, + from => $token->{from}, + until => $token->{until}, + offset => $pos ) ); + + return $self; +} + +# __END__ C4::OAI::ListIdentifiers + + + +package C4::OAI::ListRecords; + +use strict; +use warnings; +use diagnostics; +use HTTP::OAI; + +use base ("HTTP::OAI::ListRecords"); + + +sub new { + my ($class, $repository, %args) = @_; + + my $self = HTTP::OAI::ListRecords->new(%args); + + my $token = new C4::OAI::ResumptionToken( %args ); + my $dbh = C4::Context->dbh; + my $sql = "SELECT biblionumber, marcxml, timestamp + FROM biblioitems + WHERE timestamp >= ? AND timestamp <= ? + LIMIT " . $repository->{koha_max_count} . " + OFFSET " . $token->{offset}; + my $sth = $dbh->prepare( $sql ); + $sth->execute( $token->{from}, $token->{until} ); + + my $pos = $token->{offset}; + while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) { + $self->record( C4::OAI::Record->new( + $repository, $marcxml, $timestamp, + identifier => $repository->{ koha_identifier } . ':' . $biblionumber, + metadataPrefix => $token->{metadata_prefix} + ) ); + $pos++; + } + $self->resumptionToken( new C4::OAI::ResumptionToken( + metadataPrefix => $token->{metadata_prefix}, + from => $token->{from}, + until => $token->{until}, + offset => $pos ) ); + + return $self; +} + +# __END__ C4::OAI::ListRecords + + + +package C4::OAI::Repository; + +use base ("HTTP::OAI::Repository"); + +use strict; +use warnings; +use diagnostics; + +use HTTP::OAI; +use HTTP::OAI::Repository qw/:validate/; + +use XML::SAX::Writer; +use XML::LibXML; +use XML::LibXSLT; +use CGI qw/:standard -oldstyle_urls/; + +use C4::Context; +use C4::Biblio; + + +=head1 NAME + +C4::OAI::Repository - Handles OAI-PMH requests for a Koha database. + +=head1 SYNOPSIS + + use C4::OAI::Repository; + + my $repository = C4::OAI::Repository->new(); + +=head1 DESCRIPTION + +This object extend HTTP::OAI::Repository object. + +=cut + + + +sub new { + my ($class, %args) = @_; + my $self = $class->SUPER::new(%args); + + $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID"); + $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount"); + $self->{ koha_metadata_format } = ['oai_dc', 'marcxml']; + + # Check for grammatical errors in the request + my @errs = validate_request( CGI::Vars() ); + + # Is metadataPrefix supported by the respository? + my $mdp = param('metadataPrefix') || ''; + if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) { + push @errs, new HTTP::OAI::Error( + code => 'cannotDisseminateFormat', + message => "Dissemination as '$mdp' is not supported", + ); + } + + my $response; + if ( @errs ) { + $response = HTTP::OAI::Response->new( + requestURL => self_url(), + errors => \@errs, + ); + } + else { + my %attr = CGI::Vars(); + my $verb = delete( $attr{verb} ); + if ( grep { $_ eq $verb } qw( ListSets ) ) { + $response = HTTP::OAI::Response->new( + requestURL => $self->self_url(), + errors => [ new HTTP::OAI::Error( + code => 'noSetHierarchy', + message => "Koha repository doesn't have sets", + ) ] , + ); + } + elsif ( $verb eq 'Identify' ) { + $response = C4::OAI::Identify->new( $self ); + } + elsif ( $verb eq 'ListMetadataFormats' ) { + $response = C4::OAI::ListMetadataFormats->new( $self ); + } + elsif ( $verb eq 'GetRecord' ) { + $response = C4::OAI::GetRecord->new( $self, %attr ); + } + elsif ( $verb eq 'ListRecords' ) { + $response = C4::OAI::ListRecords->new( $self, %attr ); + } + elsif ( $verb eq 'ListIdentifiers' ) { + $response = C4::OAI::ListIdentifiers->new( $self, %attr ); + } + } + + $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) ); + $response->generate; + + bless $self, $class; + return $self; +} + + +# +# XSLT stylesheet used to transform MARCXML record into OAI Dublin Core. +# The object is constructed the fist time this method is called. +# +# Styleeet file is located in /koha-tmpl/intranet-tmpl/prog/en/xslt/ directory. +# Its name is constructed with 'marcflavour' syspref: +# - MARC21slim2OAIDC.xsl +# - UNIMARCslim2OADIC.xsl +# +sub oai_dc_stylesheet { + my $self = shift; + + unless ( $self->{ oai_dc_stylesheet } ) { + my $xslt_file = C4::Context->config('intranetdir') . + "/koha-tmpl/intranet-tmpl/prog/en/xslt/" . + C4::Context->preference('marcflavour') . + "slim2OAIDC.xsl"; + my $parser = XML::LibXML->new(); + my $xslt = XML::LibXSLT->new(); + my $style_doc = $parser->parse_file( $xslt_file ); + my $stylesheet = $xslt->parse_stylesheet( $style_doc ); + $self->{ oai_dc_stylesheet } = $stylesheet; + } + + return $self->{ oai_dc_stylesheet }; +} + -- 2.39.5