3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
103 $from .= 'T00:00:00Z' if length($from) == 10;
104 $until .= 'T23:59:59Z' if length($until) == 10;
105 $offset = $args{ offset } || 0;
109 $self->{ metadata_prefix } = $metadata_prefix;
110 $self->{ offset } = $offset;
111 $self->{ from } = $from;
112 $self->{ until } = $until;
113 $self->{ set } = $set;
114 $self->{ from_arg } = _strip_UTC_designators($from);
115 $self->{ until_arg } = _strip_UTC_designators($until);
117 $self->resumptionToken(
118 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
119 $self->cursor( $offset );
124 sub _strip_UTC_designators {
125 my ( $timestamp ) = @_;
126 $timestamp =~ s/T/ /g;
127 $timestamp =~ s/Z//g;
131 # __END__ C4::OAI::ResumptionToken
135 package C4::OAI::Identify;
142 use base ("HTTP::OAI::Identify");
145 my ($class, $repository) = @_;
147 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
148 my $self = $class->SUPER::new(
150 repositoryName => C4::Context->preference("LibraryName"),
151 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
152 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
153 granularity => 'YYYY-MM-DD',
154 earliestDatestamp => '0001-01-01',
155 deletedRecord => 'no',
158 # FIXME - alas, the description element is not so simple; to validate
159 # against the OAI-PMH schema, it cannot contain just a string,
160 # but one or more elements that validate against another XML schema.
161 # For now, simply omitting it.
162 # $self->description( "Koha OAI Repository" );
164 $self->compression( 'gzip' );
169 # __END__ C4::OAI::Identify
173 package C4::OAI::ListMetadataFormats;
179 use base ("HTTP::OAI::ListMetadataFormats");
182 my ($class, $repository) = @_;
184 my $self = $class->SUPER::new();
186 if ( $repository->{ conf } ) {
187 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
188 my $format = $repository->{ conf }->{ format }->{ $name };
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => $format->{metadataPrefix},
191 schema => $format->{schema},
192 metadataNamespace => $format->{metadataNamespace}, ) );
196 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
197 metadataPrefix => 'oai_dc',
198 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
199 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
201 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
202 metadataPrefix => 'marcxml',
203 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
204 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
211 # __END__ C4::OAI::ListMetadataFormats
215 package C4::OAI::Record;
220 use HTTP::OAI::Metadata::OAI_DC;
222 use base ("HTTP::OAI::Record");
225 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
227 my $self = $class->SUPER::new(%args);
229 $timestamp =~ s/ /T/, $timestamp .= 'Z';
230 $self->header( new HTTP::OAI::Header(
231 identifier => $args{identifier},
232 datestamp => $timestamp,
235 foreach my $setSpec (@$setSpecs) {
236 $self->header->setSpec($setSpec);
239 my $parser = XML::LibXML->new();
240 my $record_dom = $parser->parse_string( $marcxml );
241 my $format = $args{metadataPrefix};
242 if ( $format ne 'marcxml' ) {
244 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
246 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
248 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
253 # __END__ C4::OAI::Record
257 package C4::OAI::GetRecord;
264 use base ("HTTP::OAI::GetRecord");
268 my ($class, $repository, %args) = @_;
270 my $self = HTTP::OAI::GetRecord->new(%args);
272 my $dbh = C4::Context->dbh;
273 my $sth = $dbh->prepare("
274 SELECT marcxml, timestamp
276 WHERE biblionumber=? " );
277 my $prefix = $repository->{koha_identifier} . ':';
278 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
279 $sth->execute( $biblionumber );
280 my ($marcxml, $timestamp);
281 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
282 return HTTP::OAI::Response->new(
283 requestURL => $repository->self_url(),
284 errors => [ new HTTP::OAI::Error(
285 code => 'idDoesNotExist',
286 message => "There is no biblio record with this identifier",
291 my $oai_sets = GetOAISetsBiblio($biblionumber);
293 foreach (@$oai_sets) {
294 push @setSpecs, $_->{spec};
297 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
298 $self->record( C4::OAI::Record->new(
299 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
304 # __END__ C4::OAI::GetRecord
308 package C4::OAI::ListIdentifiers;
315 use base ("HTTP::OAI::ListIdentifiers");
319 my ($class, $repository, %args) = @_;
321 my $self = HTTP::OAI::ListIdentifiers->new(%args);
323 my $token = new C4::OAI::ResumptionToken( %args );
324 my $dbh = C4::Context->dbh;
326 if(defined $token->{'set'}) {
327 $set = GetOAISetBySpec($token->{'set'});
329 my $max = $repository->{koha_max_count};
331 SELECT biblioitems.biblionumber, biblioitems.timestamp
334 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
335 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
336 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
338 LIMIT " . ($max+1) . "
339 OFFSET $token->{offset}
341 my $sth = $dbh->prepare( $sql );
342 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
343 push @bind_params, $set->{'id'} if defined $set;
344 $sth->execute( @bind_params );
347 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
349 if ( $count > $max ) {
350 $self->resumptionToken(
351 new C4::OAI::ResumptionToken(
352 metadataPrefix => $token->{metadata_prefix},
353 from => $token->{from},
354 until => $token->{until},
355 offset => $token->{offset} + $max,
361 $timestamp =~ s/ /T/, $timestamp .= 'Z';
362 $self->identifier( new HTTP::OAI::Header(
363 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
364 datestamp => $timestamp,
371 # __END__ C4::OAI::ListIdentifiers
373 package C4::OAI::Description;
378 use HTTP::OAI::SAXHandler qw/ :SAX /;
381 my ( $class, %args ) = @_;
385 if(my $setDescription = $args{setDescription}) {
386 $self->{setDescription} = $setDescription;
388 if(my $handler = $args{handler}) {
389 $self->{handler} = $handler;
397 my ( $self, $handler ) = @_;
399 $self->{handler} = $handler if $handler;
407 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
412 # __END__ C4::OAI::Description
414 package C4::OAI::ListSets;
421 use base ("HTTP::OAI::ListSets");
424 my ( $class, $repository, %args ) = @_;
426 my $self = HTTP::OAI::ListSets->new(%args);
428 my $token = C4::OAI::ResumptionToken->new(%args);
429 my $sets = GetOAISets;
431 foreach my $set (@$sets) {
432 if ($pos < $token->{offset}) {
437 foreach my $desc (@{$set->{'descriptions'}}) {
438 push @descriptions, C4::OAI::Description->new(
439 setDescription => $desc,
444 setSpec => $set->{'spec'},
445 setName => $set->{'name'},
446 setDescription => \@descriptions,
450 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
453 $self->resumptionToken(
454 new C4::OAI::ResumptionToken(
455 metadataPrefix => $token->{metadata_prefix},
458 ) if ( $pos > $token->{offset} );
463 # __END__ C4::OAI::ListSets;
465 package C4::OAI::ListRecords;
472 use base ("HTTP::OAI::ListRecords");
476 my ($class, $repository, %args) = @_;
478 my $self = HTTP::OAI::ListRecords->new(%args);
480 my $token = new C4::OAI::ResumptionToken( %args );
481 my $dbh = C4::Context->dbh;
483 if(defined $token->{'set'}) {
484 $set = GetOAISetBySpec($token->{'set'});
486 my $max = $repository->{koha_max_count};
488 SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
491 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
492 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
493 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
495 LIMIT " . ($max + 1) . "
496 OFFSET $token->{offset}
499 my $sth = $dbh->prepare( $sql );
500 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
501 push @bind_params, $set->{'id'} if defined $set;
502 $sth->execute( @bind_params );
505 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
507 if ( $count > $max ) {
508 $self->resumptionToken(
509 new C4::OAI::ResumptionToken(
510 metadataPrefix => $token->{metadata_prefix},
511 from => $token->{from},
512 until => $token->{until},
513 offset => $token->{offset} + $max,
519 my $oai_sets = GetOAISetsBiblio($biblionumber);
521 foreach (@$oai_sets) {
522 push @setSpecs, $_->{spec};
524 $self->record( C4::OAI::Record->new(
525 $repository, $marcxml, $timestamp, \@setSpecs,
526 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
527 metadataPrefix => $token->{metadata_prefix}
534 # __END__ C4::OAI::ListRecords
538 package C4::OAI::Repository;
540 use base ("HTTP::OAI::Repository");
546 use HTTP::OAI::Repository qw/:validate/;
548 use XML::SAX::Writer;
551 use YAML::Syck qw( LoadFile );
552 use CGI qw/:standard -oldstyle_urls/;
559 my ($class, %args) = @_;
560 my $self = $class->SUPER::new(%args);
562 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
563 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
564 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
565 $self->{ koha_stylesheet } = { }; # Build when needed
567 # Load configuration file if defined in OAI-PMH:ConfFile syspref
568 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
569 $self->{ conf } = LoadFile( $file );
570 my @formats = keys %{ $self->{conf}->{format} };
571 $self->{ koha_metadata_format } = \@formats;
574 # Check for grammatical errors in the request
575 my @errs = validate_request( CGI::Vars() );
577 # Is metadataPrefix supported by the respository?
578 my $mdp = param('metadataPrefix') || '';
579 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
580 push @errs, new HTTP::OAI::Error(
581 code => 'cannotDisseminateFormat',
582 message => "Dissemination as '$mdp' is not supported",
588 $response = HTTP::OAI::Response->new(
589 requestURL => self_url(),
594 my %attr = CGI::Vars();
595 my $verb = delete( $attr{verb} );
596 if ( $verb eq 'ListSets' ) {
597 $response = C4::OAI::ListSets->new($self, %attr);
599 elsif ( $verb eq 'Identify' ) {
600 $response = C4::OAI::Identify->new( $self );
602 elsif ( $verb eq 'ListMetadataFormats' ) {
603 $response = C4::OAI::ListMetadataFormats->new( $self );
605 elsif ( $verb eq 'GetRecord' ) {
606 $response = C4::OAI::GetRecord->new( $self, %attr );
608 elsif ( $verb eq 'ListRecords' ) {
609 $response = C4::OAI::ListRecords->new( $self, %attr );
611 elsif ( $verb eq 'ListIdentifiers' ) {
612 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
616 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
625 my ( $self, $format ) = @_;
627 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
628 unless ( $stylesheet ) {
629 my $xsl_file = $self->{ conf }
630 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
631 : ( C4::Context->config('intrahtdocs') .
633 C4::Context->preference('marcflavour') .
635 my $parser = XML::LibXML->new();
636 my $xslt = XML::LibXSLT->new();
637 my $style_doc = $parser->parse_file( $xsl_file );
638 $stylesheet = $xslt->parse_stylesheet( $style_doc );
639 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
649 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
653 use C4::OAI::Repository;
655 my $repository = C4::OAI::Repository->new();
659 This object extend HTTP::OAI::Repository object.
660 It accepts OAI-PMH HTTP requests and returns result.
662 This OAI-PMH server can operate in a simple mode and extended one.
664 In simple mode, repository configuration comes entirely from Koha system
665 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
666 records in marcxml or dublin core format. Dublin core records are created from
667 koha marcxml records tranformed with XSLT. Used XSL file is located in
668 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
669 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
672 In extende mode, it's possible to parameter other format than marcxml or Dublin
673 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
674 list available metadata formats and XSL file used to create them from marcxml
675 records. If this syspref isn't set, Koha OAI server works in simple mode. A
676 configuration file koha-oai.conf can look like that:
682 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
683 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
684 xsl_file: /usr/local/koha/xslt/vs.xsl
686 metadataPrefix: marxml
687 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
688 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
690 metadataPrefix: oai_dc
691 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
692 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
693 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl