From c71468537b4fa589e91e7fd5823ddd4f54379e21 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Thu, 22 Jun 2023 11:22:18 +0000 Subject: [PATCH] Bug 33270: Attempt to recover from invalid metadata exception This patch uses the new record_strip_nonxml routine to attempt to display the record when it contains invalid characters Rather than silently strippg these, we warn in the logs, then add an 'about' container to the response. It is displayed nicely in the web view or sent as "INVALID_METADATA" in the xml response The 'error' codes for OAI seem to be at the request level, and the offered codes don't have a match for a bad record. Adding the about when we can recover seems the most generous response To test: Test plan, assumes using KTD default data - otherwise you need to find and import a record with encoding issues: 1 - Enable OAI-PMH system preference 2 - Browse to: http://localhost:8080/cgi-bin/koha/oai.pl?verb=ListRecords&resumptionToken=marcxml/350////0/0/352 3 - 500 error: Invalid data, cannot decode metadata object (biblio_metadata.id=368, biblionumber=369, format=marcxml, schema=MARC21, decoding_error=':8: parser error : PCDATA invalid Char value 31... 4 - Apply patch, restart all 5 - Reload the page 6 - It loads! 7 - Click 'Metadata' for record 369 - it succeeds! 8 - Check the logs - confirm you see a warning of the record problem 9 - Confirm 369 has an about section 10 - Check the individul 'GetRecord' response as well http://localhost:8080/cgi-bin/koha/oai.pl?verb=GetRecord&metadataPrefix=oai_dc&identifier=KOHA-OAI-TEST:369 Signed-off-by: Sam Lau Signed-off-by: Marcel de Rooy Signed-off-by: Tomas Cohen Arazi --- Koha/OAI/Server/GetRecord.pm | 3 +- Koha/OAI/Server/ListBase.pm | 30 +++++++++++------ Koha/OAI/Server/Repository.pm | 21 +++++++++--- koha-tmpl/opac-tmpl/xslt/OAI.xslt | 56 +++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+), 16 deletions(-) diff --git a/Koha/OAI/Server/GetRecord.pm b/Koha/OAI/Server/GetRecord.pm index 5e51b2fb99..aabfb3eb46 100644 --- a/Koha/OAI/Server/GetRecord.pm +++ b/Koha/OAI/Server/GetRecord.pm @@ -105,7 +105,8 @@ sub new { } else { # We fetch it using this method, rather than the database directly, # so it'll include the item data - my $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix}); + my ( $marcxml, $marcxml_error ) = $repository->get_biblio_marcxml( $biblionumber, $args{metadataPrefix} ); + $args{about} = [$marcxml_error] if $marcxml_error; $self->record( Koha::OAI::Server::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args) ); diff --git a/Koha/OAI/Server/ListBase.pm b/Koha/OAI/Server/ListBase.pm index 283f1b3334..6fd2891050 100644 --- a/Koha/OAI/Server/ListBase.pm +++ b/Koha/OAI/Server/ListBase.pm @@ -157,17 +157,25 @@ sub GetRecords { push @setSpecs, $_->{spec}; } if ( $metadata ) { - my $marcxml = !$deleted ? $repository->get_biblio_marcxml($biblionumber, $format) : undef; - if ( $marcxml ) { - $self->record( Koha::OAI::Server::Record->new( - $repository, $marcxml, $timestamp, \@setSpecs, - identifier => $repository->{ koha_identifier } . ':' . $biblionumber, - metadataPrefix => $token->{metadata_prefix} - ) ); + my ( $marcxml, $marcxml_error ); + ( $marcxml, $marcxml_error ) = $repository->get_biblio_marcxml( $biblionumber, $format ) if !$deleted; + my %params; + $params{identifier} = $repository->{koha_identifier} . ':' . $biblionumber; + $params{metadataPrefix} = $token->{metadata_prefix}; + $params{about} = [$marcxml_error] if $marcxml_error; + if ($marcxml) { + $self->record( + Koha::OAI::Server::Record->new( + $repository, $marcxml, $timestamp, \@setSpecs, + %params + ) + ); } else { - $self->record( Koha::OAI::Server::DeletedRecord->new( - $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber - ) ); + $self->record( + Koha::OAI::Server::DeletedRecord->new( + $timestamp, \@setSpecs, identifier => $repository->{koha_identifier} . ':' . $biblionumber + ) + ); } } else { $timestamp =~ s/ /T/; @@ -175,7 +183,7 @@ sub GetRecords { $self->identifier( HTTP::OAI::Header->new( identifier => $repository->{ koha_identifier} . ':' . $biblionumber, datestamp => $timestamp, - status => $deleted ? 'deleted' : undef + status => $deleted ? 'deleted' : undef, ) ); } } diff --git a/Koha/OAI/Server/Repository.pm b/Koha/OAI/Server/Repository.pm index 0cd0df5dcf..273497412f 100644 --- a/Koha/OAI/Server/Repository.pm +++ b/Koha/OAI/Server/Repository.pm @@ -37,9 +37,12 @@ use YAML::XS; use CGI qw/:standard -oldstyle_urls/; use C4::Context; use C4::Biblio qw( GetFrameworkCode ); +use C4::Charset qw( StripNonXmlChars ); use Koha::XSLT::Base; use Koha::Biblios; +use MARC::Record; + =head1 NAME Koha::OAI::Server::Repository - Handles OAI-PMH requests for a Koha database. @@ -178,8 +181,19 @@ sub get_biblio_marcxml { } my $biblio = Koha::Biblios->find($biblionumber); - my $record = $biblio->metadata->record({ embed_items => $with_items, opac => 1 }); - if ( $expanded_avs ) { + my $record; + + # Koha::Biblio::Metadata->record throws an exception on bad encoding, + # we don't want OAI harvests to die, so we catch and warn, and try to clean the record + eval { $record = $biblio->metadata->record( { embed_items => $with_items, opac => 1 } ); }; + my $decoding_error; + if ($@) { + my $exception = $@; + $exception->rethrow unless ( $exception->isa('Koha::Exceptions::Metadata::Invalid') ); + $decoding_error = "INVALID_METADATA"; + $record = $biblio->metadata->record_strip_nonxml( { embed_items => $with_items, opac => 1 } ); + } + if ( $record && $expanded_avs ) { my $frameworkcode = GetFrameworkCode($biblionumber) || ''; my $record_processor = Koha::RecordProcessor->new( { @@ -192,8 +206,7 @@ sub get_biblio_marcxml { ); $record_processor->process($record); } - - return $record ? $record->as_xml_record() : undef; + return ( $record ? $record->as_xml_record() : undef, $decoding_error ); } diff --git a/koha-tmpl/opac-tmpl/xslt/OAI.xslt b/koha-tmpl/opac-tmpl/xslt/OAI.xslt index d7956f31ed..e81eda8a31 100644 --- a/koha-tmpl/opac-tmpl/xslt/OAI.xslt +++ b/koha-tmpl/opac-tmpl/xslt/OAI.xslt @@ -331,6 +331,37 @@ + +
+ + #about +
+
+ About +
+
+
+
+ about +
+ +
+ + + + There was a problem decoding the metadata for this record, invalid characters were stripped. See system logs for details + + + + + + +
+
+
+
+
+
@@ -388,6 +419,31 @@ + +
+
+
+ About +
+
+
+ +
+ + + + There was a problem decoding the metadata for this record, invalid characters were stripped. See system logs for details + + + + + + +
+
+
+
+
-- 2.39.5