Bug 33270: Attempt to recover from invalid metadata exception

This patch uses the new record_strip_nonxml routine to attempt to display
the record when it contains invalid characters

Rather than silently strippg these, we warn in the logs, then add an 'about'
container to the response. It is displayed nicely in the web view or sent as "INVALID_METADATA" in
the xml response

The 'error' codes for OAI seem to be at the request level, and the offered codes don't have a match
for a bad record. Adding the about when we can recover seems the most generous response

To test:
Test plan, assumes using KTD default data - otherwise you need to find and import a record with encoding issues:
 1 - Enable OAI-PMH system preference
 2 - Browse to:
    http://localhost:8080/cgi-bin/koha/oai.pl?verb=ListRecords&resumptionToken=marcxml/350////0/0/352
 3 - 500 error:
    Invalid data, cannot decode metadata object (biblio_metadata.id=368, biblionumber=369, format=marcxml, schema=MARC21, decoding_error=':8: parser error : PCDATA invalid Char value 31...
 4 - Apply patch, restart all
 5 - Reload the page
 6 - It loads!
 7 - Click 'Metadata' for record 369 - it succeeds!
 8 - Check the logs - confirm you see a warning of the record problem
 9 - Confirm 369 has an about section
10 - Check the individul 'GetRecord' response as well
    http://localhost:8080/cgi-bin/koha/oai.pl?verb=GetRecord&metadataPrefix=oai_dc&identifier=KOHA-OAI-TEST:369

Signed-off-by: Sam Lau <samalau@gmail.com>

Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
This commit is contained in:
Nick Clemens 2023-06-22 11:22:18 +00:00 committed by Tomas Cohen Arazi
parent 4997d59c83
commit c71468537b
Signed by: tomascohen
GPG key ID: 0A272EA1B2F3C15F
4 changed files with 94 additions and 16 deletions

View file

@ -105,7 +105,8 @@ sub new {
} else {
# We fetch it using this method, rather than the database directly,
# so it'll include the item data
my $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix});
my ( $marcxml, $marcxml_error ) = $repository->get_biblio_marcxml( $biblionumber, $args{metadataPrefix} );
$args{about} = [$marcxml_error] if $marcxml_error;
$self->record(
Koha::OAI::Server::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args)
);

View file

@ -157,17 +157,25 @@ sub GetRecords {
push @setSpecs, $_->{spec};
}
if ( $metadata ) {
my $marcxml = !$deleted ? $repository->get_biblio_marcxml($biblionumber, $format) : undef;
if ( $marcxml ) {
$self->record( Koha::OAI::Server::Record->new(
$repository, $marcxml, $timestamp, \@setSpecs,
identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
metadataPrefix => $token->{metadata_prefix}
) );
my ( $marcxml, $marcxml_error );
( $marcxml, $marcxml_error ) = $repository->get_biblio_marcxml( $biblionumber, $format ) if !$deleted;
my %params;
$params{identifier} = $repository->{koha_identifier} . ':' . $biblionumber;
$params{metadataPrefix} = $token->{metadata_prefix};
$params{about} = [$marcxml_error] if $marcxml_error;
if ($marcxml) {
$self->record(
Koha::OAI::Server::Record->new(
$repository, $marcxml, $timestamp, \@setSpecs,
%params
)
);
} else {
$self->record( Koha::OAI::Server::DeletedRecord->new(
$timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber
) );
$self->record(
Koha::OAI::Server::DeletedRecord->new(
$timestamp, \@setSpecs, identifier => $repository->{koha_identifier} . ':' . $biblionumber
)
);
}
} else {
$timestamp =~ s/ /T/;
@ -175,7 +183,7 @@ sub GetRecords {
$self->identifier( HTTP::OAI::Header->new(
identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
datestamp => $timestamp,
status => $deleted ? 'deleted' : undef
status => $deleted ? 'deleted' : undef,
) );
}
}

View file

@ -37,9 +37,12 @@ use YAML::XS;
use CGI qw/:standard -oldstyle_urls/;
use C4::Context;
use C4::Biblio qw( GetFrameworkCode );
use C4::Charset qw( StripNonXmlChars );
use Koha::XSLT::Base;
use Koha::Biblios;
use MARC::Record;
=head1 NAME
Koha::OAI::Server::Repository - Handles OAI-PMH requests for a Koha database.
@ -178,8 +181,19 @@ sub get_biblio_marcxml {
}
my $biblio = Koha::Biblios->find($biblionumber);
my $record = $biblio->metadata->record({ embed_items => $with_items, opac => 1 });
if ( $expanded_avs ) {
my $record;
# Koha::Biblio::Metadata->record throws an exception on bad encoding,
# we don't want OAI harvests to die, so we catch and warn, and try to clean the record
eval { $record = $biblio->metadata->record( { embed_items => $with_items, opac => 1 } ); };
my $decoding_error;
if ($@) {
my $exception = $@;
$exception->rethrow unless ( $exception->isa('Koha::Exceptions::Metadata::Invalid') );
$decoding_error = "INVALID_METADATA";
$record = $biblio->metadata->record_strip_nonxml( { embed_items => $with_items, opac => 1 } );
}
if ( $record && $expanded_avs ) {
my $frameworkcode = GetFrameworkCode($biblionumber) || '';
my $record_processor = Koha::RecordProcessor->new(
{
@ -192,8 +206,7 @@ sub get_biblio_marcxml {
);
$record_processor->process($record);
}
return $record ? $record->as_xml_record() : undef;
return ( $record ? $record->as_xml_record() : undef, $decoding_error );
}

View file

@ -331,6 +331,37 @@
</div>
</div>
</div>
<xsl:if test="oai:about">
<div class="panel panel-success">
<a data-toggle="collapse">
<xsl:attribute name="href">#about<xsl:value-of select="translate(oai:header/oai:identifier/text(), ':/.', '')"></xsl:value-of></xsl:attribute>
<div class="panel-heading">
<h5 class="panel-title">
About
</h5>
</div>
</a>
<div class="panel-collapse collapse">
<xsl:attribute name="id">about<xsl:value-of select="translate(oai:header/oai:identifier/text(), ':/.', '')"></xsl:value-of></xsl:attribute>
<div class="panel-body list-group">
<xsl:for-each select="oai:about">
<div class="list-group-item">
<span>
<xsl:choose>
<xsl:when test="text()='INVALID_METADATA'">
There was a problem decoding the metadata for this record, invalid characters were stripped. See system logs for details
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="text()" />
</xsl:otherwise>
</xsl:choose>
</span>
</div>
</xsl:for-each>
</div>
</div>
</div>
</xsl:if>
</div>
</div>
</xsl:for-each>
@ -388,6 +419,31 @@
<xsl:apply-templates select="oai:metadata/*" />
</div>
</div>
<xsl:if test="oai:about">
<div class="panel panel-info">
<div class="panel-heading">
<h5 class="panel-title">
About
</h5>
</div>
<div class="panel-body list-group">
<xsl:for-each select="oai:about">
<div class="list-group-item">
<span>
<xsl:choose>
<xsl:when test="text()='INVALID_METADATA'">
There was a problem decoding the metadata for this record, invalid characters were stripped. See system logs for details
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="text()" />
</xsl:otherwise>
</xsl:choose>
</span>
</div>
</xsl:for-each>
</div>
</div>
</xsl:if>
</div>
</div>
</xsl:for-each>