From 4997d59c83ec1c63a3dc2c089fed79e3ad5fa764 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Fri, 23 Jun 2023 13:08:31 +0000 Subject: [PATCH] Bug 33270: Add record_strip_nonxml routine to Koha::Biblio::Metadata This adds a routine that can strip non xml characters form a record. It is intended for cases where we do not wish to throw an exception, but rather need to process a record to allow other work to continue To test: prove -v t/db_dependent/Koha/Biblio/Metadata.t Signed-off-by: Sam Lau Signed-off-by: Marcel de Rooy Signed-off-by: Tomas Cohen Arazi --- Koha/Biblio/Metadata.pm | 37 +++++++++++++++++ t/db_dependent/Koha/Biblio/Metadata.t | 57 ++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/Koha/Biblio/Metadata.pm b/Koha/Biblio/Metadata.pm index b6d532ea40..474049f75e 100644 --- a/Koha/Biblio/Metadata.pm +++ b/Koha/Biblio/Metadata.pm @@ -21,6 +21,7 @@ use MARC::File::XML; use Scalar::Util qw( blessed ); use C4::Biblio qw( GetMarcFromKohaField ); +use C4::Charset qw( StripNonXmlChars ); use C4::Items qw( GetMarcItem ); use Koha::Database; use Koha::Exceptions::Metadata; @@ -133,6 +134,42 @@ sub record { return $record; } +=head3 record_strip_nonxml + +my $record = $metadata->record_strip_nonxml; + +This subroutine is intended for cases where we encounter a record that cannot be parsed, but want +to make a good effort to present the record (for harvesting, deletion, editing) rather than throwing +an exception + +Will return undef if the record cannot be built + +=cut + +sub record_strip_nonxml { + + my ($self, $params) = @_; + + my $record; + my $marcxml_error; + + eval { + $record = MARC::Record->new_from_xml( + StripNonXmlChars( $self->metadata ), 'UTF-8', + $self->schema + ); + }; + if( $@ ){ + $marcxml_error = $@; + chomp $marcxml_error; + warn $marcxml_error; + return; + } + + $params->{record} = $record; + return $self->record( $params ); +} + =head2 Internal methods =head3 _embed_items diff --git a/t/db_dependent/Koha/Biblio/Metadata.t b/t/db_dependent/Koha/Biblio/Metadata.t index d6a5b8b94e..5318ebc23f 100755 --- a/t/db_dependent/Koha/Biblio/Metadata.t +++ b/t/db_dependent/Koha/Biblio/Metadata.t @@ -17,7 +17,7 @@ use Modern::Perl; -use Test::More tests => 3; +use Test::More tests => 4; use Test::Exception; use Test::Warn; @@ -89,6 +89,61 @@ subtest 'record() tests' => sub { $schema->storage->txn_rollback; }; +subtest 'record_strip_nonxml() tests' => sub { + + plan tests => 5; + + $schema->storage->txn_begin; + + my $title = 'Oranges and' . chr(31) . ' Peaches'; + + # Create a valid record + my $record = MARC::Record->new(); + my $field = MARC::Field->new( '245', '', '', 'a' => $title ); + $record->append_fields($field); + my ($biblio_id) = C4::Biblio::AddBiblio( $record, '' ); + + my $metadata = Koha::Biblios->find($biblio_id)->metadata; + my $record2 = $metadata->record_strip_nonxml; + + is( ref $record2, 'MARC::Record', 'Method record() returned a MARC::Record object' ); + is( + $record2->field('245')->subfield("a"), + "Oranges and Peaches", 'Title in 245$a matches title with control character removed' + ); + + my $bad_data = $builder->build_object( + { + class => 'Koha::Biblio::Metadatas', + value => { format => 'marcxml', schema => 'MARC21', metadata => 'this_is_not_marcxml' } + } + ); + + warning_like { $record2 = $bad_data->record_strip_nonxml; } + qr/parser error : Start tag expected, '<' not found/, + 'Warning thrown explicitly'; + + is( + $record2, undef, + "record_strip_nonxml returns undef when the record cannot be parsed after removing nonxml characters" + ); + + my $builder = t::lib::TestBuilder->new; + my $item = $builder->build_sample_item( { biblionumber => $metadata->biblionumber } ); + + # Emptied the OpacHiddenItems pref + t::lib::Mocks::mock_preference( 'OpacHiddenItems', '' ); + my ($itemfield) = C4::Biblio::GetMarcFromKohaField('items.itemnumber'); + + $record2 = $metadata->record_strip_nonxml( { embed_items => 1 } ); + + my @items = $record2->field($itemfield); + + is( scalar @items, 1, "We got back our item" ); + + $schema->storage->txn_rollback; +}; + subtest '_embed_items' => sub { plan tests => 10; -- 2.39.5