From 4022ca8dd27f2e84ba1cd4d59daea9741f86eb96 Mon Sep 17 00:00:00 2001 From: Philippe Jaillon Date: Mon, 30 Jun 2008 16:07:36 +0200 Subject: [PATCH] OAI package handle correctly unicode content Signed-off-by: Joshua Ferraro --- C4/OAI/DC.pm | 3 +-- C4/OAI/DP.pm | 38 ++++++++++++++++++++++++++++++-------- opac/oai.pl | 37 ++++++++++++++++++++++++++++--------- 3 files changed, 59 insertions(+), 19 deletions(-) mode change 100644 => 100755 C4/OAI/DC.pm mode change 100644 => 100755 C4/OAI/DP.pm diff --git a/C4/OAI/DC.pm b/C4/OAI/DC.pm old mode 100644 new mode 100755 index 4c9eca50e7..e19dd5e3e9 --- a/C4/OAI/DC.pm +++ b/C4/OAI/DC.pm @@ -103,7 +103,6 @@ and the I keys set. package C4::OAI::DC; -use Encode; use C4::OAI::DP; use vars ('@ISA'); @ISA = ("C4::OAI::DP"); @@ -161,7 +160,7 @@ sub Archive_FormatRecord my $footer = "\n"; my $metadata = ''; - $metadata = $header . encode("utf8", decode( "iso-8859-1",$self->{'utility'}->FormatXML($dc))) . $footer if( $dc ); + $metadata = $header . $self->{'utility'}->FormatXML($dc) . $footer if( $dc ); $self->FormatRecord ($hashref->Identifier()->[0] , $hashref->DateStamp(), diff --git a/C4/OAI/DP.pm b/C4/OAI/DP.pm old mode 100644 new mode 100755 index d67a85410e..361c1fdb82 --- a/C4/OAI/DP.pm +++ b/C4/OAI/DP.pm @@ -10,6 +10,15 @@ # -----------------------------------------+-------------+------------- # Virginia Polytechnic Institute and State University | www.vt.edu # -------------------------------------------------------+------------- +# January 2008 +# ------------------+-------------------------------------------------- +# Ph. Jaillon | +# ------------------+----------------------+--------------------------- +# Department of Computer Science | +# -----------------------------------------+-------------+------------- +# Ecole Nationale Superieure des Mines de St-Etienne | www.emse.fr +# -------------------------------------------------------+------------- + $VERSION = '1.0.0'; @@ -41,6 +50,8 @@ use POSIX; use CGI; use C4::OAI::Utility; +# setting binmode to utf8 (any characters printed on STDOUT are utf8 encoded) +binmode(STDOUT, ":utf8"); # constructor sub new @@ -498,16 +509,27 @@ sub Identify }, { 'title' => 'VTOAI Perl Data Provider', - 'author' => { - 'name' => 'Hussein Suleman', - 'email' => 'hussein@vt.edu', - 'institution' => 'Virginia Tech', - 'mdorder' => [ qw ( name email institution ) ], - }, + 'author' => [ + { + 'name' => 'Hussein Suleman', + 'email' => 'hussein@vt.edu', + 'institution' => 'Virginia Tech', + 'mdorder' => [ qw ( name email institution ) ], + }, + { + 'name' => 'Philippe Jaillon', + 'email' => 'jaillon@emse.fr', + 'institution' => 'École Nationale Supérieure des Mines de Saint-Étienne', + 'mdorder' => [ qw ( name email institution ) ], + } + ], 'version' => '3.05', - 'URL' => 'http://www.dlib.vt.edu/projects/OAI/', + 'URL' => [ + 'http://www.dlib.vt.edu/projects/OAI/', + 'http://oai-pmh.emse.fr/' + ], 'mdorder' => [ qw ( title author version URL ) ] - } + }, ]] }; push (@{$identity->{'description'}}, $desc); diff --git a/opac/oai.pl b/opac/oai.pl index 73f352f4f7..44b7367f2a 100755 --- a/opac/oai.pl +++ b/opac/oai.pl @@ -30,7 +30,8 @@ use MARC::File::USMARC; sub new { # Get a MAR::Record as parameter and bless it as MARC::Record::KOHADC shift; - bless shift; + my $marc = shift; + bless $marc if( ref( $marc ) ); } sub subfield { @@ -61,6 +62,15 @@ my @result = (); \@result; } +sub XMLescape { +my ($t) = shift; + + foreach (@$t ) { + s/\&/\&/g; s/getfields('biblio.title'); + &XMLescape( $self->getfields('biblio.title') ); } sub Creator { my $self = shift; - $self->getfields('biblio.author'); + &XMLescape( $self->getfields('biblio.author') ); } sub Subject { my $self = shift; - $self->getfields('bibliosubject.subject'); + &XMLescape( $self->getfields('bibliosubject.subject') ); } sub DateStamp { @@ -125,12 +135,12 @@ sub Language { sub Type { my $self = shift; - $self->getfields('biblioitems.itemtype'); + &XMLescape( $self->getfields('biblioitems.itemtype') ); } sub Publisher { my $self = shift; - $self->getfields('biblioitems.publishercode'); + &XMLescape( $self->getfields('biblioitems.publishercode') ); } sub Set { @@ -285,8 +295,13 @@ sub Archive_GetRecord if( my $r = $sth->fetchrow_hashref() ) { my $marc = new MARC::Record::KOHADC( ::GetMarcBiblio( $identifier ) ); - $marc->{'biblio.timestamp'} = $r->{'timestamp'}; - return $marc ; + if( $marc ) { + $marc->{'biblio.timestamp'} = $r->{'timestamp'}; + return $marc ; + } + else { + warn("Archive_GetRecord : no MARC record for " . C4::Context->preference("OAI-PMH:archiveID") . ":" . $identifier); + } } $self->AddError ('idDoesNotExist', 'The value of the identifier argument is unknown or illegal in this repository'); @@ -316,11 +331,15 @@ sub Archive_ListRecords ($metadataPrefix, $offset, $from, $until ) = &parseResumptionToken($from, $until, $metadataPrefix, $resumptionToken); -warn( "Archive_ListRecords : $set, $from, $until, $metadataPrefix, $resumptionToken\n"); +#warn( "Archive_ListRecords : $set, $from, $until, $metadataPrefix, $resumptionToken\n"); $sth->execute( $from,$until,$self->{'MaxCount'}?$self->{'MaxCount'}:100000, $offset ); while( my $r = $sth->fetchrow_hashref() ) { my $marc = new MARC::Record::KOHADC( ::GetMarcBiblio( $r->{'biblionumber'} ) ); + unless( $marc ) { # somme time there is problems within koha, and we can't get valid marc record + warn("Archive_ListRecords : no MARC record for " . C4::Context->preference("OAI-PMH:archiveID") .":" . $r->{'biblionumber'} ); + next; + } $marc->{'biblio.timestamp'} = $r->{'timestamp'}; push( @allrows, $marc ); } -- 2.39.2