Koha/misc/batchCompareMARCvsFrameworks.pl
Galen Charlton b4f39e5c58 do not let MARC::Batch open MARC files
The version of MARC::Batch->new() distributed with version
2.0.0 of MARC::Record, if given a file name, will
open it using the ':utf8' layer.  This results in an
incorrect character conversion when processing records
in the MARC-8 character encoding.

To avoid this, batch jobs that use MARC::Batch now
open the file themselves, then pass the file handle
to MARC::Batch->new().

Signed-off-by: Joshua Ferraro <jmf@liblime.com>
2008-03-21 21:46:39 -05:00

105 lines
3.2 KiB
Perl
Executable file

#!/usr/bin/perl
# small script that dumps an iso2709 file.
use strict;
BEGIN {
# find Koha's Perl modules
# test carefully before changing this
use FindBin;
eval { require "$FindBin::Bin/kohalib.pl" };
}
# Koha modules used
use C4::Context;
use MARC::File::USMARC;
use MARC::Record;
use MARC::Batch;
use Getopt::Long;
use IO::File;
my ( $input_marc_file,$number,$nowarning,$frameworkcode) = ('',0);
my $version;
GetOptions(
'file:s' => \$input_marc_file,
'n:s' => \$number,
'v' => \$version,
'w' => \$nowarning,
'c' => \$frameworkcode,
);
$frameworkcode="" unless $frameworkcode;
if ($version || ($input_marc_file eq '')) {
print <<EOF
This script compares an iso2709 file and Koha's MARC frameworks
It will show the marc fields/subfields used in Koha, and that
are not in the iso2709 file and which fields/subfields that are
used in the iso2709 file and not in Koha.
parameters :
\tv : this version/help screen
\tfile /path/to/file/to/dump : the file to dump
\tw : warning and strict off. If your dump fails, try -w option. It it works, then, the file is iso2709, but a buggy one !
\tc : the frameworkcode. If omitted, set to ""
SAMPLE : ./compare_iso_and_marc_parameters.pl -file /home/paul/koha.dev/local/npl -n 1
EOF
;
die;
}#/
my $fh = IO::File->new($input_marc_file); # don't let MARC::Batch open the file, as it applies the ':utf8' IO layer
my $batch = MARC::Batch->new( 'USMARC', $fh );
$batch->warnings_off() unless $nowarning;
$batch->strict_off() unless $nowarning;
my $dbh=C4::Context->dbh;
my $sth = $dbh->prepare("select tagfield,tagsubfield,tab from marc_subfield_structure where frameworkcode=?");
$sth->execute($frameworkcode);
my %hash_unused;
my %hash_used;
while (my ($tagfield,$tagsubfield,$tab) = $sth->fetchrow) {
$hash_unused{"$tagfield$tagsubfield"} = 1 if ($tab eq -1);
$hash_used{"$tagfield$tagsubfield"} = 1 if ($tab ne -1);
}
my $i=0;
while ( my $record = $batch->next() ) {
$i++;
foreach my $MARCfield ($record->fields()) {
next if $MARCfield->tag()<=010;
if ($MARCfield) {
foreach my $fields ($MARCfield->subfields()) {
if ($fields) {
if ($hash_unused{$MARCfield->tag().@$fields[0]}>=1) {
$hash_unused{$MARCfield->tag().@$fields[0]}++;
}
if ($hash_used{$MARCfield->tag().@$fields[0]}>=1) {
$hash_used{$MARCfield->tag().@$fields[0]}++;
}
}
# foreach my $field (@$fields) {
# warn "==>".$MARCfield->tag().@$fields[0];
# }
}
}
}
}
print "Undeclared tag/subfields that exists in the file\n";
print "================================================\n";
foreach my $key (sort keys %hash_unused) {
print "$key => ".($hash_unused{$key}-1)."\n" unless ($hash_unused{$key}==1);
}
print "Declared tag/subfields unused in the iso2709 file\n";
print "=================================================\n";
foreach my $key (sort keys %hash_used) {
print "$key => ".($hash_used{$key}-1)."\n" if ($hash_used{$key}==1);
}
# foreach my $x (sort keys %resB) {
# print "$x => ".$resB{$x}."\n";
# }
print "\n==================\n$i record parsed\n";