3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
103 $from .= 'T00:00:00Z' if length($from) == 10;
104 $until .= 'T23:59:59Z' if length($until) == 10;
105 $offset = $args{ offset } || 0;
109 $self->{ metadata_prefix } = $metadata_prefix;
110 $self->{ offset } = $offset;
111 $self->{ from } = $from;
112 $self->{ until } = $until;
113 $self->{ set } = $set;
114 $self->{ from_arg } = _strip_UTC_designators($from);
115 $self->{ until_arg } = _strip_UTC_designators($until);
117 $self->resumptionToken(
118 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
119 $self->cursor( $offset );
124 sub _strip_UTC_designators {
125 my ( $timestamp ) = @_;
126 $timestamp =~ s/T/ /g;
127 $timestamp =~ s/Z//g;
131 # __END__ C4::OAI::ResumptionToken
135 package C4::OAI::Identify;
142 use base ("HTTP::OAI::Identify");
145 my ($class, $repository) = @_;
147 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
148 my $self = $class->SUPER::new(
150 repositoryName => C4::Context->preference("LibraryName"),
151 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
152 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
153 granularity => 'YYYY-MM-DD',
154 earliestDatestamp => '0001-01-01',
155 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
158 # FIXME - alas, the description element is not so simple; to validate
159 # against the OAI-PMH schema, it cannot contain just a string,
160 # but one or more elements that validate against another XML schema.
161 # For now, simply omitting it.
162 # $self->description( "Koha OAI Repository" );
164 $self->compression( 'gzip' );
169 # __END__ C4::OAI::Identify
173 package C4::OAI::ListMetadataFormats;
179 use base ("HTTP::OAI::ListMetadataFormats");
182 my ($class, $repository) = @_;
184 my $self = $class->SUPER::new();
186 if ( $repository->{ conf } ) {
187 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
188 my $format = $repository->{ conf }->{ format }->{ $name };
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => $format->{metadataPrefix},
191 schema => $format->{schema},
192 metadataNamespace => $format->{metadataNamespace}, ) );
196 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
197 metadataPrefix => 'oai_dc',
198 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
199 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
201 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
202 metadataPrefix => 'marcxml',
203 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
204 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
211 # __END__ C4::OAI::ListMetadataFormats
215 package C4::OAI::Record;
220 use HTTP::OAI::Metadata::OAI_DC;
222 use base ("HTTP::OAI::Record");
225 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
227 my $self = $class->SUPER::new(%args);
229 $timestamp =~ s/ /T/, $timestamp .= 'Z';
230 $self->header( new HTTP::OAI::Header(
231 identifier => $args{identifier},
232 datestamp => $timestamp,
235 foreach my $setSpec (@$setSpecs) {
236 $self->header->setSpec($setSpec);
239 my $parser = XML::LibXML->new();
240 my $record_dom = $parser->parse_string( $marcxml );
241 my $format = $args{metadataPrefix};
242 if ( $format ne 'marcxml' ) {
244 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
246 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
248 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
253 # __END__ C4::OAI::Record
255 package C4::OAI::DeletedRecord;
259 use HTTP::OAI::Metadata::OAI_DC;
261 use base ("HTTP::OAI::Record");
264 my ($class, $timestamp, $setSpecs, %args) = @_;
266 my $self = $class->SUPER::new(%args);
268 $timestamp =~ s/ /T/, $timestamp .= 'Z';
269 $self->header( new HTTP::OAI::Header(
271 identifier => $args{identifier},
272 datestamp => $timestamp,
275 foreach my $setSpec (@$setSpecs) {
276 $self->header->setSpec($setSpec);
282 # __END__ C4::OAI::DeletedRecord
286 package C4::OAI::GetRecord;
295 use base ("HTTP::OAI::GetRecord");
299 my ($class, $repository, %args) = @_;
301 my $self = HTTP::OAI::GetRecord->new(%args);
303 my $dbh = C4::Context->dbh;
304 my $sth = $dbh->prepare("
307 WHERE biblionumber=? " );
308 my $prefix = $repository->{koha_identifier} . ':';
309 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
310 $sth->execute( $biblionumber );
312 unless ( ($timestamp) = $sth->fetchrow ) {
313 unless ( ($timestamp) = $dbh->selectrow_array(q/
316 WHERE biblionumber=? /, undef, $biblionumber ))
318 return HTTP::OAI::Response->new(
319 requestURL => $repository->self_url(),
320 errors => [ new HTTP::OAI::Error(
321 code => 'idDoesNotExist',
322 message => "There is no biblio record with this identifier",
331 # We fetch it using this method, rather than the database directly,
332 # so it'll include the item data
335 my $record = GetMarcBiblio($biblionumber, 1);
336 $marcxml = $record->as_xml();
338 my $oai_sets = GetOAISetsBiblio($biblionumber);
340 foreach (@$oai_sets) {
341 push @setSpecs, $_->{spec};
344 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
347 ? C4::OAI::DeletedRecord->new($timestamp, \@setSpecs, %args)
348 : C4::OAI::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args);
353 # __END__ C4::OAI::GetRecord
357 package C4::OAI::ListIdentifiers;
364 use base ("HTTP::OAI::ListIdentifiers");
368 my ($class, $repository, %args) = @_;
370 my $self = HTTP::OAI::ListIdentifiers->new(%args);
372 my $token = new C4::OAI::ResumptionToken( %args );
373 my $dbh = C4::Context->dbh;
375 if(defined $token->{'set'}) {
376 $set = GetOAISetBySpec($token->{'set'});
378 my $max = $repository->{koha_max_count};
380 (SELECT biblioitems.biblionumber, biblioitems.timestamp
383 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
384 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
385 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
387 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
388 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
389 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
390 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
392 $sql .= ") ORDER BY biblionumber
393 LIMIT " . ($max+1) . "
394 OFFSET $token->{offset}
396 my $sth = $dbh->prepare( $sql );
397 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
398 push @bind_params, $set->{'id'} if defined $set;
399 push @bind_params, ($token->{'from'}, $token->{'until'});
400 push @bind_params, $set->{'id'} if defined $set;
401 $sth->execute( @bind_params );
404 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
406 if ( $count > $max ) {
407 $self->resumptionToken(
408 new C4::OAI::ResumptionToken(
409 metadataPrefix => $token->{metadata_prefix},
410 from => $token->{from},
411 until => $token->{until},
412 offset => $token->{offset} + $max,
418 $timestamp =~ s/ /T/, $timestamp .= 'Z';
419 $self->identifier( new HTTP::OAI::Header(
420 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
421 datestamp => $timestamp,
425 # Return error if no results
427 return HTTP::OAI::Response->new(
428 requestURL => $repository->self_url(),
429 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
436 # __END__ C4::OAI::ListIdentifiers
438 package C4::OAI::Description;
443 use HTTP::OAI::SAXHandler qw/ :SAX /;
446 my ( $class, %args ) = @_;
450 if(my $setDescription = $args{setDescription}) {
451 $self->{setDescription} = $setDescription;
453 if(my $handler = $args{handler}) {
454 $self->{handler} = $handler;
462 my ( $self, $handler ) = @_;
464 $self->{handler} = $handler if $handler;
472 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
477 # __END__ C4::OAI::Description
479 package C4::OAI::ListSets;
486 use base ("HTTP::OAI::ListSets");
489 my ( $class, $repository, %args ) = @_;
491 my $self = HTTP::OAI::ListSets->new(%args);
493 my $token = C4::OAI::ResumptionToken->new(%args);
494 my $sets = GetOAISets;
496 foreach my $set (@$sets) {
497 if ($pos < $token->{offset}) {
502 foreach my $desc (@{$set->{'descriptions'}}) {
503 push @descriptions, C4::OAI::Description->new(
504 setDescription => $desc,
509 setSpec => $set->{'spec'},
510 setName => $set->{'name'},
511 setDescription => \@descriptions,
515 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
518 $self->resumptionToken(
519 new C4::OAI::ResumptionToken(
520 metadataPrefix => $token->{metadata_prefix},
523 ) if ( $pos > $token->{offset} );
528 # __END__ C4::OAI::ListSets;
530 package C4::OAI::ListRecords;
537 use base ("HTTP::OAI::ListRecords");
541 my ($class, $repository, %args) = @_;
543 my $self = HTTP::OAI::ListRecords->new(%args);
545 my $token = new C4::OAI::ResumptionToken( %args );
546 my $dbh = C4::Context->dbh;
548 if(defined $token->{'set'}) {
549 $set = GetOAISetBySpec($token->{'set'});
551 my $max = $repository->{koha_max_count};
553 (SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
556 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
557 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
558 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
560 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
561 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
562 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
563 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
565 $sql .= ") ORDER BY biblionumber
566 LIMIT " . ($max + 1) . "
567 OFFSET $token->{offset}
569 my $sth = $dbh->prepare( $sql );
570 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
571 push @bind_params, $set->{'id'} if defined $set;
572 push @bind_params, ($token->{'from'}, $token->{'until'});
573 push @bind_params, $set->{'id'} if defined $set;
574 $sth->execute( @bind_params );
577 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
579 if ( $count > $max ) {
580 $self->resumptionToken(
581 new C4::OAI::ResumptionToken(
582 metadataPrefix => $token->{metadata_prefix},
583 from => $token->{from},
584 until => $token->{until},
585 offset => $token->{offset} + $max,
591 my $oai_sets = GetOAISetsBiblio($biblionumber);
593 foreach (@$oai_sets) {
594 push @setSpecs, $_->{spec};
597 $self->record( C4::OAI::Record->new(
598 $repository, $marcxml, $timestamp, \@setSpecs,
599 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
600 metadataPrefix => $token->{metadata_prefix}
603 $self->record( C4::OAI::DeletedRecord->new(
604 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
608 # Return error if no results
610 return HTTP::OAI::Response->new(
611 requestURL => $repository->self_url(),
612 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
619 # __END__ C4::OAI::ListRecords
623 package C4::OAI::Repository;
625 use base ("HTTP::OAI::Repository");
631 use HTTP::OAI::Repository qw/:validate/;
633 use XML::SAX::Writer;
636 use YAML::Syck qw( LoadFile );
637 use CGI qw/:standard -oldstyle_urls/;
644 my ($class, %args) = @_;
645 my $self = $class->SUPER::new(%args);
647 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
648 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
649 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
650 $self->{ koha_stylesheet } = { }; # Build when needed
652 # Load configuration file if defined in OAI-PMH:ConfFile syspref
653 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
654 $self->{ conf } = LoadFile( $file );
655 my @formats = keys %{ $self->{conf}->{format} };
656 $self->{ koha_metadata_format } = \@formats;
659 # Check for grammatical errors in the request
660 my @errs = validate_request( CGI::Vars() );
662 # Is metadataPrefix supported by the respository?
663 my $mdp = param('metadataPrefix') || '';
664 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
665 push @errs, new HTTP::OAI::Error(
666 code => 'cannotDisseminateFormat',
667 message => "Dissemination as '$mdp' is not supported",
673 $response = HTTP::OAI::Response->new(
674 requestURL => self_url(),
679 my %attr = CGI::Vars();
680 my $verb = delete( $attr{verb} );
681 if ( $verb eq 'ListSets' ) {
682 $response = C4::OAI::ListSets->new($self, %attr);
684 elsif ( $verb eq 'Identify' ) {
685 $response = C4::OAI::Identify->new( $self );
687 elsif ( $verb eq 'ListMetadataFormats' ) {
688 $response = C4::OAI::ListMetadataFormats->new( $self );
690 elsif ( $verb eq 'GetRecord' ) {
691 $response = C4::OAI::GetRecord->new( $self, %attr );
693 elsif ( $verb eq 'ListRecords' ) {
694 $response = C4::OAI::ListRecords->new( $self, %attr );
696 elsif ( $verb eq 'ListIdentifiers' ) {
697 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
701 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
710 my ( $self, $format ) = @_;
712 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
713 unless ( $stylesheet ) {
714 my $xsl_file = $self->{ conf }
715 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
716 : ( C4::Context->config('intrahtdocs') .
718 C4::Context->preference('marcflavour') .
720 my $parser = XML::LibXML->new();
721 my $xslt = XML::LibXSLT->new();
722 my $style_doc = $parser->parse_file( $xsl_file );
723 $stylesheet = $xslt->parse_stylesheet( $style_doc );
724 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
734 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
738 use C4::OAI::Repository;
740 my $repository = C4::OAI::Repository->new();
744 This object extend HTTP::OAI::Repository object.
745 It accepts OAI-PMH HTTP requests and returns result.
747 This OAI-PMH server can operate in a simple mode and extended one.
749 In simple mode, repository configuration comes entirely from Koha system
750 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
751 records in marcxml or dublin core format. Dublin core records are created from
752 koha marcxml records tranformed with XSLT. Used XSL file is located in
753 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
754 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
757 In extende mode, it's possible to parameter other format than marcxml or Dublin
758 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
759 list available metadata formats and XSL file used to create them from marcxml
760 records. If this syspref isn't set, Koha OAI server works in simple mode. A
761 configuration file koha-oai.conf can look like that:
767 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
768 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
769 xsl_file: /usr/local/koha/xslt/vs.xsl
771 metadataPrefix: marxml
772 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
773 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
775 metadataPrefix: oai_dc
776 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
777 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
778 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl