From 0306281e417ffe8f3f7e3ff033f58a0872ce980d Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Thu, 25 Jul 2024 16:37:17 +0000 Subject: [PATCH] Bug 37478: Add strict mode to bulkmarcimport This patch adds a conversion from MARC -> XML -> MARC to catch any parsing errors. If errors are found, we then lint the record to catch any problems, output the warnings, and skip the record. To test: 1 - Download the sample records from this bug report 2 - perl misc/migration_tools/bulkmarcimport.pl -b --file=520_nosubfield.mrc -v fails! 3 - perl misc/migration_tools/bulkmarcimport.pl -b --file=003_subfielda.mrc -v fails! 4 - Apply patch 5 - Repeat 2 & 3 - no change 6 - Add -st switch to the commands: perl misc/migration_tools/bulkmarcimport.pl -b --file=520_nosubfield.mrc -v -st perl misc/migration_tools/bulkmarcimport.pl -b --file=003_subfielda.mrc -v -st 7 - The records are now skipped, and the script completes 8 - Confirm the warnings generated are useful 9 - Sign off! Signed-off-by: David Nind Signed-off-by: Martin Renvoize Signed-off-by: Katrin Fischer --- misc/migration_tools/bulkmarcimport.pl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl index 2dd83578a2..5bfe991145 100755 --- a/misc/migration_tools/bulkmarcimport.pl +++ b/misc/migration_tools/bulkmarcimport.pl @@ -9,6 +9,7 @@ use Modern::Perl; use MARC::File::USMARC; use MARC::File::XML; use MARC::Batch; +use MARC::Lint; use Encode; use Koha::Script; @@ -76,6 +77,7 @@ my $localcust; my $marc_mod_template = ''; my $marc_mod_template_id = -1; my $skip_indexing = 0; +my $strict_mode; $| = 1; GetOptions( @@ -113,6 +115,7 @@ GetOptions( 'custom:s' => \$localcust, 'marcmodtemplate:s' => \$marc_mod_template, 'si|skip_indexing' => \$skip_indexing, + 'st|strict' => \$strict_mode, ); $biblios ||= !$authorities; @@ -320,6 +323,7 @@ my $record_number = 0; my $logger = Koha::Logger->get; my $schema = Koha::Database->schema; my $marc_records = []; +my $lint = MARC::Lint->new; RECORD: while () { my $record; $record_number++; @@ -339,6 +343,19 @@ RECORD: while () { } if ($record) { + if ($strict_mode) { + my $xml = $record->as_xml_record(); + eval { MARC::Record::new_from_xml( $xml, 'UTF-8', "MARC21" ); }; + if ($@) { + print "Record $record_number generated invalid xml:\n"; + $lint->check_record($record); + foreach my $warning ( $lint->warnings ) { + print " " . $warning . "\n"; + } + print " Record skipped!"; + next; + } + } # transcode the record to UTF8 if needed & applicable. if ( $record->encoding() eq 'MARC-8' and not $skip_marc8_conversion ) { my ( $guessed_charset, $charset_errors ); -- 2.39.5