3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
103 $from .= 'T00:00:00Z' if length($from) == 10;
104 $until .= 'T23:59:59Z' if length($until) == 10;
105 $offset = $args{ offset } || 0;
109 $self->{ metadata_prefix } = $metadata_prefix;
110 $self->{ offset } = $offset;
111 $self->{ from } = $from;
112 $self->{ until } = $until;
113 $self->{ set } = $set;
114 $self->{ from_arg } = _strip_UTC_designators($from);
115 $self->{ until_arg } = _strip_UTC_designators($until);
117 $self->resumptionToken(
118 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
119 $self->cursor( $offset );
124 sub _strip_UTC_designators {
125 my ( $timestamp ) = @_;
126 $timestamp =~ s/T/ /g;
127 $timestamp =~ s/Z//g;
131 # __END__ C4::OAI::ResumptionToken
135 package C4::OAI::Identify;
142 use base ("HTTP::OAI::Identify");
145 my ($class, $repository) = @_;
147 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
148 my $self = $class->SUPER::new(
150 repositoryName => C4::Context->preference("LibraryName"),
151 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
152 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
153 granularity => 'YYYY-MM-DD',
154 earliestDatestamp => '0001-01-01',
155 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
158 # FIXME - alas, the description element is not so simple; to validate
159 # against the OAI-PMH schema, it cannot contain just a string,
160 # but one or more elements that validate against another XML schema.
161 # For now, simply omitting it.
162 # $self->description( "Koha OAI Repository" );
164 $self->compression( 'gzip' );
169 # __END__ C4::OAI::Identify
173 package C4::OAI::ListMetadataFormats;
179 use base ("HTTP::OAI::ListMetadataFormats");
182 my ($class, $repository) = @_;
184 my $self = $class->SUPER::new();
186 if ( $repository->{ conf } ) {
187 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
188 my $format = $repository->{ conf }->{ format }->{ $name };
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => $format->{metadataPrefix},
191 schema => $format->{schema},
192 metadataNamespace => $format->{metadataNamespace}, ) );
196 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
197 metadataPrefix => 'oai_dc',
198 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
199 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
201 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
202 metadataPrefix => 'marcxml',
203 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
204 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
211 # __END__ C4::OAI::ListMetadataFormats
215 package C4::OAI::Record;
220 use HTTP::OAI::Metadata::OAI_DC;
222 use base ("HTTP::OAI::Record");
225 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
227 my $self = $class->SUPER::new(%args);
229 $timestamp =~ s/ /T/, $timestamp .= 'Z';
230 $self->header( new HTTP::OAI::Header(
231 identifier => $args{identifier},
232 datestamp => $timestamp,
235 foreach my $setSpec (@$setSpecs) {
236 $self->header->setSpec($setSpec);
239 my $parser = XML::LibXML->new();
240 my $record_dom = $parser->parse_string( $marcxml );
241 my $format = $args{metadataPrefix};
242 if ( $format ne 'marcxml' ) {
244 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
246 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
248 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
253 # __END__ C4::OAI::Record
255 package C4::OAI::DeletedRecord;
259 use HTTP::OAI::Metadata::OAI_DC;
261 use base ("HTTP::OAI::Record");
264 my ($class, $timestamp, $setSpecs, %args) = @_;
266 my $self = $class->SUPER::new(%args);
268 $timestamp =~ s/ /T/, $timestamp .= 'Z';
269 $self->header( new HTTP::OAI::Header(
271 identifier => $args{identifier},
272 datestamp => $timestamp,
275 foreach my $setSpec (@$setSpecs) {
276 $self->header->setSpec($setSpec);
282 # __END__ C4::OAI::DeletedRecord
286 package C4::OAI::GetRecord;
293 use base ("HTTP::OAI::GetRecord");
297 my ($class, $repository, %args) = @_;
299 my $self = HTTP::OAI::GetRecord->new(%args);
301 my $dbh = C4::Context->dbh;
302 my $prefix = $repository->{koha_identifier} . ':';
303 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
304 my ($marcxml, $timestamp);
306 unless ( ($marcxml, $timestamp) = $dbh->selectrow_array(q/
307 SELECT marcxml, timestamp
309 WHERE biblionumber=? /, undef, $biblionumber)) {
311 unless ( ($marcxml, $timestamp) = $dbh->selectrow_array(q/
312 SELECT biblionumber, timestamp
314 WHERE biblionumber=? /, undef, $biblionumber )) {
317 return HTTP::OAI::Response->new(
318 requestURL => $repository->self_url(),
319 errors => [ new HTTP::OAI::Error(
320 code => 'idDoesNotExist',
321 message => "There is no biblio record with this identifier",
328 my $oai_sets = GetOAISetsBiblio($biblionumber);
330 foreach (@$oai_sets) {
331 push @setSpecs, $_->{spec};
334 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
335 ($deleted == 1) ? $self->record( C4::OAI::DeletedRecord->new(
336 $timestamp, \@setSpecs, %args ) )
337 : $self->record( C4::OAI::Record->new(
338 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
342 # __END__ C4::OAI::GetRecord
346 package C4::OAI::ListIdentifiers;
353 use base ("HTTP::OAI::ListIdentifiers");
357 my ($class, $repository, %args) = @_;
359 my $self = HTTP::OAI::ListIdentifiers->new(%args);
361 my $token = new C4::OAI::ResumptionToken( %args );
362 my $dbh = C4::Context->dbh;
364 if(defined $token->{'set'}) {
365 $set = GetOAISetBySpec($token->{'set'});
367 my $max = $repository->{koha_max_count};
369 (SELECT biblioitems.biblionumber, biblioitems.timestamp
372 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
373 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
374 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
376 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
377 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
378 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
379 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
381 $sql .= ") ORDER BY biblionumber
382 LIMIT " . ($max+1) . "
383 OFFSET $token->{offset}
385 my $sth = $dbh->prepare( $sql );
386 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
387 push @bind_params, $set->{'id'} if defined $set;
388 push @bind_params, ($token->{'from'}, $token->{'until'});
389 push @bind_params, $set->{'id'} if defined $set;
390 $sth->execute( @bind_params );
393 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
395 if ( $count > $max ) {
396 $self->resumptionToken(
397 new C4::OAI::ResumptionToken(
398 metadataPrefix => $token->{metadata_prefix},
399 from => $token->{from},
400 until => $token->{until},
401 offset => $token->{offset} + $max,
407 $timestamp =~ s/ /T/, $timestamp .= 'Z';
408 $self->identifier( new HTTP::OAI::Header(
409 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
410 datestamp => $timestamp,
417 # __END__ C4::OAI::ListIdentifiers
419 package C4::OAI::Description;
424 use HTTP::OAI::SAXHandler qw/ :SAX /;
427 my ( $class, %args ) = @_;
431 if(my $setDescription = $args{setDescription}) {
432 $self->{setDescription} = $setDescription;
434 if(my $handler = $args{handler}) {
435 $self->{handler} = $handler;
443 my ( $self, $handler ) = @_;
445 $self->{handler} = $handler if $handler;
453 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
458 # __END__ C4::OAI::Description
460 package C4::OAI::ListSets;
467 use base ("HTTP::OAI::ListSets");
470 my ( $class, $repository, %args ) = @_;
472 my $self = HTTP::OAI::ListSets->new(%args);
474 my $token = C4::OAI::ResumptionToken->new(%args);
475 my $sets = GetOAISets;
477 foreach my $set (@$sets) {
478 if ($pos < $token->{offset}) {
483 foreach my $desc (@{$set->{'descriptions'}}) {
484 push @descriptions, C4::OAI::Description->new(
485 setDescription => $desc,
490 setSpec => $set->{'spec'},
491 setName => $set->{'name'},
492 setDescription => \@descriptions,
496 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
499 $self->resumptionToken(
500 new C4::OAI::ResumptionToken(
501 metadataPrefix => $token->{metadata_prefix},
504 ) if ( $pos > $token->{offset} );
509 # __END__ C4::OAI::ListSets;
511 package C4::OAI::ListRecords;
518 use base ("HTTP::OAI::ListRecords");
522 my ($class, $repository, %args) = @_;
524 my $self = HTTP::OAI::ListRecords->new(%args);
526 my $token = new C4::OAI::ResumptionToken( %args );
527 my $dbh = C4::Context->dbh;
529 if(defined $token->{'set'}) {
530 $set = GetOAISetBySpec($token->{'set'});
532 my $max = $repository->{koha_max_count};
534 (SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
537 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
538 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
539 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
541 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
542 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
543 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
544 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
546 $sql .= ") ORDER BY biblionumber
547 LIMIT " . ($max + 1) . "
548 OFFSET $token->{offset}
550 my $sth = $dbh->prepare( $sql );
551 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
552 push @bind_params, $set->{'id'} if defined $set;
553 push @bind_params, ($token->{'from'}, $token->{'until'});
554 push @bind_params, $set->{'id'} if defined $set;
555 $sth->execute( @bind_params );
558 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
560 if ( $count > $max ) {
561 $self->resumptionToken(
562 new C4::OAI::ResumptionToken(
563 metadataPrefix => $token->{metadata_prefix},
564 from => $token->{from},
565 until => $token->{until},
566 offset => $token->{offset} + $max,
572 my $oai_sets = GetOAISetsBiblio($biblionumber);
574 foreach (@$oai_sets) {
575 push @setSpecs, $_->{spec};
578 $self->record( C4::OAI::Record->new(
579 $repository, $marcxml, $timestamp, \@setSpecs,
580 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
581 metadataPrefix => $token->{metadata_prefix}
584 $self->record( C4::OAI::DeletedRecord->new(
585 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
592 # __END__ C4::OAI::ListRecords
596 package C4::OAI::Repository;
598 use base ("HTTP::OAI::Repository");
604 use HTTP::OAI::Repository qw/:validate/;
606 use XML::SAX::Writer;
609 use YAML::Syck qw( LoadFile );
610 use CGI qw/:standard -oldstyle_urls/;
617 my ($class, %args) = @_;
618 my $self = $class->SUPER::new(%args);
620 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
621 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
622 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
623 $self->{ koha_stylesheet } = { }; # Build when needed
625 # Load configuration file if defined in OAI-PMH:ConfFile syspref
626 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
627 $self->{ conf } = LoadFile( $file );
628 my @formats = keys %{ $self->{conf}->{format} };
629 $self->{ koha_metadata_format } = \@formats;
632 # Check for grammatical errors in the request
633 my @errs = validate_request( CGI::Vars() );
635 # Is metadataPrefix supported by the respository?
636 my $mdp = param('metadataPrefix') || '';
637 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
638 push @errs, new HTTP::OAI::Error(
639 code => 'cannotDisseminateFormat',
640 message => "Dissemination as '$mdp' is not supported",
646 $response = HTTP::OAI::Response->new(
647 requestURL => self_url(),
652 my %attr = CGI::Vars();
653 my $verb = delete( $attr{verb} );
654 if ( $verb eq 'ListSets' ) {
655 $response = C4::OAI::ListSets->new($self, %attr);
657 elsif ( $verb eq 'Identify' ) {
658 $response = C4::OAI::Identify->new( $self );
660 elsif ( $verb eq 'ListMetadataFormats' ) {
661 $response = C4::OAI::ListMetadataFormats->new( $self );
663 elsif ( $verb eq 'GetRecord' ) {
664 $response = C4::OAI::GetRecord->new( $self, %attr );
666 elsif ( $verb eq 'ListRecords' ) {
667 $response = C4::OAI::ListRecords->new( $self, %attr );
669 elsif ( $verb eq 'ListIdentifiers' ) {
670 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
674 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
683 my ( $self, $format ) = @_;
685 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
686 unless ( $stylesheet ) {
687 my $xsl_file = $self->{ conf }
688 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
689 : ( C4::Context->config('intrahtdocs') .
691 C4::Context->preference('marcflavour') .
693 my $parser = XML::LibXML->new();
694 my $xslt = XML::LibXSLT->new();
695 my $style_doc = $parser->parse_file( $xsl_file );
696 $stylesheet = $xslt->parse_stylesheet( $style_doc );
697 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
707 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
711 use C4::OAI::Repository;
713 my $repository = C4::OAI::Repository->new();
717 This object extend HTTP::OAI::Repository object.
718 It accepts OAI-PMH HTTP requests and returns result.
720 This OAI-PMH server can operate in a simple mode and extended one.
722 In simple mode, repository configuration comes entirely from Koha system
723 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
724 records in marcxml or dublin core format. Dublin core records are created from
725 koha marcxml records tranformed with XSLT. Used XSL file is located in
726 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
727 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
730 In extende mode, it's possible to parameter other format than marcxml or Dublin
731 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
732 list available metadata formats and XSL file used to create them from marcxml
733 records. If this syspref isn't set, Koha OAI server works in simple mode. A
734 configuration file koha-oai.conf can look like that:
740 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
741 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
742 xsl_file: /usr/local/koha/xslt/vs.xsl
744 metadataPrefix: marxml
745 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
746 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
748 metadataPrefix: oai_dc
749 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
750 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
751 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl