From 706f472642d988da2c0a93b5530208d7cc34374b Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Thu, 25 Aug 2016 14:54:09 +0100 Subject: [PATCH] Bug 10455: Remove MARC21_utf8_flag_fix.pl Is this script still in use? It uses the biblioitems.marc field so if it's still useful it will need to be rewritten. Signed-off-by: Mason James Signed-off-by: Nick Clemens Signed-off-by: Kyle M Hall --- misc/maintenance/MARC21_utf8_flag_fix.pl | 219 ----------------------- 1 file changed, 219 deletions(-) delete mode 100755 misc/maintenance/MARC21_utf8_flag_fix.pl diff --git a/misc/maintenance/MARC21_utf8_flag_fix.pl b/misc/maintenance/MARC21_utf8_flag_fix.pl deleted file mode 100755 index 17b885d9f8..0000000000 --- a/misc/maintenance/MARC21_utf8_flag_fix.pl +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2009 Liblime -# -# This file is part of Koha. -# -# Koha is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# Koha is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Koha; if not, see . - -use strict; -use warnings; - -use MARC::Record; -use MARC::File::XML; -use Getopt::Long qw(:config auto_help auto_version); -use Pod::Usage; - -use C4::Biblio; -use C4::Charset; -use C4::Context; -use C4::Debug; - -BEGIN { - # find Koha's Perl modules - # test carefully before changing this - use FindBin; - eval { require "$FindBin::Bin/../kohalib.pl" }; -} - -our $debug; - -## OPTIONS -my $help = 0; -my $man = 0; -my $verbose = 0; - -my $limit; # undef, not zero. -my $offset = 0; -my $dump = 0; -my $summary = 1; -my $fix = 0; - -GetOptions( - 'help|?' => \$help, - 'man' => \$man, - 'verbose=i' => \$verbose, - 'limit=i' => \$limit, - 'offset=i' => \$offset, - 'dump!' => \$dump, - 'summary!' => \$summary, - 'fix!' => \$fix, -) or pod2usage(2); -pod2usage( -verbose => 2 ) if ($man); -pod2usage( -verbose => 2 ) if ($help and $verbose); -pod2usage(1) if $help; - -if ($debug) { - $summary++; - $verbose++; -} - -my $marcflavour = C4::Context->preference('marcflavour') or die "No marcflavour (MARC21 or UNIMARC) set in syspref"; -($marcflavour eq 'MARC21') or die "marcflavour must be MARC21, not $marcflavour"; - -my $all = C4::Context->dbh->prepare("SELECT COUNT(*) FROM biblioitems"); -$all->execute; -my $total = $all->fetchrow; - -my $count_query = "SELECT COUNT(*) FROM biblioitems WHERE substr(marc, 10, 1) = ?"; -my $query = "SELECT * FROM biblioitems WHERE substr(marc, 10, 1) <> ?"; - -my $sth = C4::Context->dbh->prepare($count_query); -$sth->execute('a'); -my $count = $sth->fetchrow; -my $badcount = $total-$count; - -if ($summary) { - print "# biblioitems with leader/09 = 'a'\n"; - printf "# %9s match\n", $count; - printf "# %9s BAD \n", $badcount; - printf "# %9s total\n\n", $total; - printf "# Examining %s BAD record(s), offset %d:\n", ($limit || 'all'), $offset; -} - -my $bad_recs = C4::Context->dbh->prepare($query); -$bad_recs->execute('a'); -$limit or $limit = $bad_recs->rows(); # limit becomes max if unspecified -$limit += $offset if $offset; # increase limit for offset -my $i = 0; - -MARC::File::XML->default_record_format($marcflavour) or die "FAILED MARC::File::XML->default_record_format($marcflavour)"; - -while ( my $row = $bad_recs->fetchrow_hashref() ) { - (++$i > $limit) and last; - ( $i > $offset) or next; - my $xml = $row->{marcxml}; - $xml =~ s/.*(\)/$1/s; - $xml =~ s/(\<\/leader\>).*/$1/s; - # $xml now pared down to just the element - printf "# %4d of %4d: biblionumber %s : %s\n", $i, $badcount, $row->{biblionumber}, $xml; - my $stripped = StripNonXmlChars($row->{marcxml}); - ($stripped eq $row->{marcxml}) or printf STDERR "%d NON-XML Characters removed!!\n", (length($row->{marcxml}) - length($stripped)); - my $record = eval { MARC::Record::new_from_xml( $stripped, 'utf8', $marcflavour ) }; - if ($@ or not $record) { - print STDERR "ERROR in MARC::Record::new_from_xml(\$marcxml, 'utf8', $marcflavour): $@\n\tSkipping $row->{biblionumber}\n"; - next; - } - if ($fix) { - SetMarcUnicodeFlag($record, $marcflavour); - if (ModBiblioMarc($record, $row->{biblionumber})) { - printf "# %4d of %4d: biblionumber %s : %s\n", $i, $badcount, $row->{biblionumber}, $record->leader(); - } else { - print STDERR "ERROR in ModBiblioMarc(\$record, $row->{biblionumber})\n"; - } - } - $dump and print $row->{marcxml}, "\n"; -} - -__END__ - -=head1 NAME - -MARC21_utf8_flag_fix.pl - Repair missing leader position 9 value ("a" for MARC21 - UTF8). - -=head1 SYNOPSIS - -MARC21_utf8_flag_fix.pl [ -h | -m ] [ -v ] [ -d ] [ -s ] [ -l N ] [ -o N ] [ -f ] - - Help Options: - -h --help -? Brief help message - -m --man Full documentation, same as --help --verbose - --version Prints version info - - Feedback Options: - -d --dump Dump MARCXML of biblioitems processed, default OFF - -s --summary Print initial summary of good and bad biblioitems counted, default ON - -v --verbose Increase verbosity of output, default OFF - - Run Options: - -f --fix Save repaired leaders to biblioitems.marcxml, - -l --limit Number of biblioitems to display or fix - -o --offset Number of biblioitems to skip (not displayed or fixed) - -=head1 OPTIONS - -=over 8 - -=item B<--fix> - -This is the most important option. Without it, the script just tells you about the problem records. -With --fix, the script fixes the same records. - -=item B<--limit=N> - -Like a LIMIT statement in SQL, this constrains the number of records targeted by the script to an integer N. -The default is to target all records with bad leaders. - -=item B<--offset=N> - -Like an OFFSET statement in SQL, this tells the script to skip N of the targeted records. -The default is 0, i.e. skip none of them. - -=back - -The binary ON/OFF options can be negated like: - B<--nosummary> Do not display summary. - B<--nodump> Do not dump MARCXML. - B<--nofix> Do not change any records. This is the default mode. - -=head1 DESCRIPTION - -Koha expects to have all MARXML records internalized in UTF-8 encoding. This -presents a problem when records have been inserted with the leader/09 showing -blank for MARC8 encoding. This script is used to determine the extent of the -problem and to fix the affected leaders. - -As the name suggests, this script is only useful for MARC21 and will die for marcflavour UNIMARC. - -Run MARC21_utf8_flag_fix.pl the first time with no options, and assuming you agree that the leaders -presented need fixing, run it again with B<--fix>. - -=head1 USAGE EXAMPLES - -B - -In the most basic form, displays summary of biblioitems examined -and the leader from any found without /09 = a. - -B - -Fixes the same biblioitems, displaying summary and each leader before/after change. - -B - -Dumps MARCXML from the 16th, 17th and 18th bad records found. - -B - -Same thing as previous example in terse form. - -=head1 TO DO - -Allow biblionumbers to be piped into STDIN as the selection mechanism. - -=head1 SEE ALSO - -C4::Biblio - -=cut -- 2.39.5