From b4f39e5c5877e55baff8cfe4dba02c9475a2547e Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 21 Mar 2008 14:37:55 -0500 Subject: [PATCH] do not let MARC::Batch open MARC files The version of MARC::Batch->new() distributed with version 2.0.0 of MARC::Record, if given a file name, will open it using the ':utf8' layer. This results in an incorrect character conversion when processing records in the MARC-8 character encoding. To avoid this, batch jobs that use MARC::Batch now open the file themselves, then pass the file handle to MARC::Batch->new(). Signed-off-by: Joshua Ferraro --- misc/batchCompareMARCvsFrameworks.pl | 5 ++++- misc/batchImportMARCWithBiblionumbers.pl | 5 ++++- misc/migration_tools/bulkauthimport.pl | 8 +++++--- misc/migration_tools/bulkmarcimport.pl | 9 +++++---- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/misc/batchCompareMARCvsFrameworks.pl b/misc/batchCompareMARCvsFrameworks.pl index 0b806f0a2a..e255602fd6 100755 --- a/misc/batchCompareMARCvsFrameworks.pl +++ b/misc/batchCompareMARCvsFrameworks.pl @@ -17,6 +17,8 @@ use MARC::Record; use MARC::Batch; use Getopt::Long; +use IO::File; + my ( $input_marc_file,$number,$nowarning,$frameworkcode) = ('',0); my $version; GetOptions( @@ -49,7 +51,8 @@ EOF die; }#/ -my $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); +my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer +my $batch = MARC::Batch->new( 'USMARC', $fh ); $batch->warnings_off() unless $nowarning; $batch->strict_off() unless $nowarning; my $dbh=C4::Context->dbh; diff --git a/misc/batchImportMARCWithBiblionumbers.pl b/misc/batchImportMARCWithBiblionumbers.pl index 893d988a76..a1bf26eb74 100755 --- a/misc/batchImportMARCWithBiblionumbers.pl +++ b/misc/batchImportMARCWithBiblionumbers.pl @@ -19,6 +19,8 @@ use MARC::File::XML; use MARC::Batch; use Time::HiRes qw(gettimeofday); use Getopt::Long; +use IO::File; + my $input_marc_file = ''; my ($version); GetOptions( @@ -45,7 +47,8 @@ my $timeneeded; my $dbh = C4::Context->dbh; my $sth2=$dbh->prepare("update biblioitems set marc=? where biblionumber=?"); -my $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); +my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer +my $batch = MARC::Batch->new( 'USMARC', $fh ); $batch->warnings_off(); $batch->strict_off(); my ($tagfield,$biblionumtagsubfield) = &GetMarcFromKohaField("biblio.biblionumber",""); diff --git a/misc/migration_tools/bulkauthimport.pl b/misc/migration_tools/bulkauthimport.pl index 85233bac37..76aa6ebff6 100755 --- a/misc/migration_tools/bulkauthimport.pl +++ b/misc/migration_tools/bulkauthimport.pl @@ -19,8 +19,9 @@ use C4::Context; use C4::Charset; use C4::AuthoritiesMarc; use Time::HiRes qw(gettimeofday); - use Getopt::Long; +use IO::File; + my ( $input_marc_file, $number) = ('',0); my ($version, $delete, $test_parameter,$char_encoding, $verbose, $format, $commit); $| = 1; @@ -72,6 +73,7 @@ my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21'; $char_encoding = 'MARC21' unless ($char_encoding); print "CHAR : $char_encoding\n" if $verbose; my $starttime = gettimeofday; +my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer my $batch; if ($format =~ /XML/i) { # ugly hack follows -- MARC::File::XML, when used by MARC::Batch, @@ -84,9 +86,9 @@ if ($format =~ /XML/i) { # extract the records, not using regexes to look # for .*. $MARC::File::XML::_load_args{BinaryEncoding} = 'utf-8'; - $batch = MARC::Batch->new( 'XML', $input_marc_file ); + $batch = MARC::Batch->new( 'XML', $fh ); } else { - $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); + $batch = MARC::Batch->new( 'USMARC', $fh ); } $batch->warnings_off(); $batch->strict_off(); diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl index d6b8d401b9..019167f4e9 100755 --- a/misc/migration_tools/bulkmarcimport.pl +++ b/misc/migration_tools/bulkmarcimport.pl @@ -24,9 +24,9 @@ use C4::Items; use Unicode::Normalize; use Time::HiRes qw(gettimeofday); use Getopt::Long; -binmode(STDOUT, ":utf8"); +use IO::File; -use Getopt::Long; +binmode(STDOUT, ":utf8"); my ( $input_marc_file, $number) = ('',0); my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format); @@ -107,6 +107,7 @@ my $marcFlavour = C4::Context->preference('marcflavour') || 'MARC21'; print "Characteristic MARC flavour: $marcFlavour\n" if $verbose; my $starttime = gettimeofday; my $batch; +my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer if ($format =~ /XML/i) { # ugly hack follows -- MARC::File::XML, when used by MARC::Batch, # appears to try to convert incoming XML records from MARC-8 @@ -118,9 +119,9 @@ if ($format =~ /XML/i) { # extract the records, not using regexes to look # for .*. $MARC::File::XML::_load_args{BinaryEncoding} = 'utf-8'; - $batch = MARC::Batch->new( 'XML', $input_marc_file ); + $batch = MARC::Batch->new( 'XML', $fh ); } else { - $batch = MARC::Batch->new( 'USMARC', $input_marc_file ); + $batch = MARC::Batch->new( 'USMARC', $fh ); } $batch->warnings_off(); $batch->strict_off(); -- 2.39.5