Browse Source
This patch adds: - a new maintenance script batch_sanitize_records - a new subroutine C4::Charset::SanitizeRecord - new unit tests for the new subroutine Test plan: 1/ prove t/db_dependent/Charset.t 2/ Create a record containing "&amp;" (could be follow with as many 'amp;' as you want) in one of its fields and the same for the field linked to biblioitems.url. The url should not be sanitized, it may contain "&". 3/ Launch the maintenance script with the -h parameter to see how to use it. 4/ Launch the script using the different parameters: --filename=FILENAME --biblionumbers='XXX' --auto-search The auto-search permits to sanitize all records containing "&amp;" in the marcxml field. Use the verbose flag for testing. Without the --confirm flag, nothing is done. 5/ Use the --confirm flag and verify in the biblioitems.marcxml field that the record has been sanitized. 6/ Try the --reindex flag to reindex records which have been modified. Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl> Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com> Signed-off-by: Tomas Cohen Arazi <tomascohen@gmail.com>3.18.x
3 changed files with 337 additions and 0 deletions
@ -0,0 +1,222 @@ |
|||
#!/usr/bin/perl |
|||
|
|||
# This file is part of Koha. |
|||
# |
|||
# Copyright 2014 BibLibre |
|||
# |
|||
# Koha is free software; you can redistribute it and/or modify it |
|||
# under the terms of the GNU General Public License as published by |
|||
# the Free Software Foundation; either version 3 of the License, or |
|||
# (at your option) any later version. |
|||
# |
|||
# Koha is distributed in the hope that it will be useful, but |
|||
# WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
# GNU General Public License for more details. |
|||
# |
|||
# You should have received a copy of the GNU General Public License |
|||
# along with Koha; if not, see <http://www.gnu.org/licenses>. |
|||
|
|||
use Modern::Perl; |
|||
use C4::Charset qw( SanitizeRecord ); |
|||
use C4::Context; |
|||
use DBI; |
|||
use C4::Biblio; |
|||
use Getopt::Long; |
|||
use Pod::Usage; |
|||
|
|||
my ( $help, $verbose, $confirm, $biblionumbers, $reindex, $filename, |
|||
$auto_search ); |
|||
my $result = GetOptions( |
|||
'h|help' => \$help, |
|||
'v|verbose' => \$verbose, |
|||
'c|confirm' => \$confirm, |
|||
'biblionumbers:s' => \$biblionumbers, |
|||
'reindex' => \$reindex, |
|||
'f|filename:s' => \$filename, |
|||
'auto-search' => \$auto_search, |
|||
) || pod2usage(1); |
|||
|
|||
if ($help) { |
|||
pod2usage(0); |
|||
} |
|||
|
|||
unless ( $filename or $biblionumbers or $auto_search ) { |
|||
pod2usage( |
|||
-exitval => 1, |
|||
-message => |
|||
qq{\n\tAt least one record number source should be provided.\n} |
|||
); |
|||
} |
|||
|
|||
if ( $filename and $biblionumbers |
|||
or $filename and $auto_search |
|||
or $biblionumbers and $auto_search ) |
|||
{ |
|||
pod2usage( |
|||
-exitval => 1, |
|||
-message => qq{\n\tOnly one record number source should be provided.\n} |
|||
); |
|||
} |
|||
|
|||
my @biblionumbers; |
|||
|
|||
# We first detect if we have a file or biblos directly entered by command line |
|||
#or if we want to use findAmp() sub |
|||
if ($auto_search) { |
|||
@biblionumbers = biblios_to_sanitize(); |
|||
} |
|||
elsif ($filename) { |
|||
if ( -e $filename ) { |
|||
open( my $fh, '<', $filename ) || die("Can't open $filename ($!)"); |
|||
while (<$fh>) { |
|||
chomp; |
|||
my $line = $_; |
|||
push @biblionumbers, split( " |,", $line ); |
|||
} |
|||
close $fh; |
|||
} |
|||
else { |
|||
pod2usage( |
|||
-exitval => 1, |
|||
-message => |
|||
qq{\n\tThis filename does not exist. Please verify the path is correct.\n} |
|||
); |
|||
} |
|||
} |
|||
else { |
|||
@biblionumbers = split m|,|, $biblionumbers if $biblionumbers; |
|||
} |
|||
|
|||
# We remove spaces |
|||
s/(^\s*|\s*$)//g for @biblionumbers; |
|||
|
|||
# Remove empty lines |
|||
@biblionumbers = grep { !/^$/ } @biblionumbers; |
|||
|
|||
say @biblionumbers . " records to process" if $verbose; |
|||
|
|||
my @changes; |
|||
for my $biblionumber (@biblionumbers) { |
|||
print "processing record $biblionumber..." if $verbose; |
|||
unless ( $biblionumber =~ m|^\d+$| ) { |
|||
say " skipping. ERROR: Invalid biblionumber." if $verbose; |
|||
next; |
|||
} |
|||
my $record = C4::Biblio::GetMarcBiblio($biblionumber); |
|||
unless ($record) { |
|||
say " skipping. ERROR: Invalid record." if $verbose; |
|||
next; |
|||
} |
|||
|
|||
my ( $cleaned_record, $has_been_modified ) = |
|||
C4::Charset::SanitizeRecord( $record, $biblionumber ); |
|||
if ($has_been_modified) { |
|||
my $frameworkcode = C4::Biblio::GetFrameworkCode($record); |
|||
|
|||
C4::Biblio::ModBiblio( $cleaned_record, $biblionumber, $frameworkcode ) |
|||
if $confirm; |
|||
push @changes, $biblionumber; |
|||
say " Done!" if $verbose; |
|||
} |
|||
else { |
|||
say " Nothing todo." if $verbose; |
|||
} |
|||
} |
|||
|
|||
if ($verbose) { |
|||
say "Total: " |
|||
. @changes |
|||
. " records " |
|||
. ( $confirm ? "cleaned!" : "to clean." ); |
|||
} |
|||
|
|||
if ( $reindex and $confirm and @changes ) { |
|||
say "Now, reindexing using -b -v" if $verbose; |
|||
my $kohapath = C4::Context->config('intranetdir'); |
|||
my $cmd = qq| |
|||
$kohapath/misc/migration_tools/rebuild_zebra.pl -b -v -where "biblionumber IN ( | |
|||
. join( ',', @changes ) . q| )" |
|||
|; |
|||
system($cmd); |
|||
} |
|||
|
|||
sub biblios_to_sanitize { |
|||
my $dbh = C4::Context->dbh; |
|||
my $query = q{ |
|||
SELECT biblionumber |
|||
FROM biblioitems |
|||
WHERE marcxml |
|||
LIKE "%&amp;%" |
|||
}; |
|||
return @{ $dbh->selectcol_arrayref( $query, { Slice => {} }, ) }; |
|||
} |
|||
|
|||
=head1 NAME |
|||
|
|||
batch_sanitize_biblios - This script sanitize a biblio, replacing '&amp;amp;etc.' with '&' in it. |
|||
|
|||
=head1 SYNOPSIS |
|||
|
|||
batch_sanitize_biblios.pl [-h|--help] [-v|--verbose] [-c|--confirm] [--biblionumbers=BIBLIONUMBER_LIST] [-f|--filename=FILENAME] [--auto-search] [--reindex] |
|||
|
|||
Replace '&' by '&' in a record, you can either give some biblionumbers or a file with biblionumbers or ask for an auto-search |
|||
|
|||
=head1 OPTIONS |
|||
|
|||
=over |
|||
|
|||
=item B<-h|--help> |
|||
|
|||
Print a brief help message |
|||
|
|||
=item B<-v|--verbose> |
|||
|
|||
Verbose mode. |
|||
|
|||
=item B<-c|--confirm> |
|||
|
|||
This flag must be provided in order for the script to actually |
|||
sanitize records. If it is not supplied, the script will |
|||
only report on the record list to process. |
|||
|
|||
=item B<--biblionumbers=BIBLIONUMBER_LIST> |
|||
|
|||
Give a biblionumber list using this parameter. They must be separated by comma. |
|||
|
|||
=item B<-f|--filename=FILENAME> |
|||
|
|||
Give a biblionumber list using a filename. One biblionumber by line or separate them with a withespace character. |
|||
|
|||
=item B<--auto_search> |
|||
|
|||
Automatically search records containing "&" in biblioitems.marcxml or in the specified fields. |
|||
|
|||
=item B<--reindex> |
|||
|
|||
Reindex the modified records. |
|||
|
|||
=back |
|||
|
|||
=head1 AUTHOR |
|||
|
|||
Alex Arnaud <alex.arnaud@biblibre.com> |
|||
Christophe Croullebois <christophe.croullebois@biblibre.com> |
|||
Jonathan Druart <jonathan.druart@biblibre.com> |
|||
|
|||
=head1 COPYRIGHT |
|||
|
|||
Copyright 2014 BibLibre |
|||
|
|||
=head1 LICENSE |
|||
|
|||
This file is part of Koha. |
|||
|
|||
Koha is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software |
|||
Foundation; either version 3 of the License, or (at your option) any later version. |
|||
|
|||
You should have received a copy of the GNU General Public License along |
|||
with Koha; if not, write to the Free Software Foundation, Inc., |
|||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|||
|
|||
=cut |
@ -0,0 +1,51 @@ |
|||
use Modern::Perl; |
|||
use Test::More tests => 4; |
|||
use MARC::Record; |
|||
|
|||
use C4::Biblio qw( AddBiblio SetFieldMapping GetMarcFromKohaField ); |
|||
use C4::Context; |
|||
use C4::Charset qw( SanitizeRecord ); |
|||
|
|||
my $dbh = C4::Context->dbh; |
|||
$dbh->{RaiseError} = 1; |
|||
$dbh->{AutoCommit} = 0; |
|||
|
|||
my $frameworkcode = q||; |
|||
|
|||
$dbh->do(q| |
|||
DELETE FROM marc_subfield_structure WHERE kohafield='biblioitems.url' |
|||
|); |
|||
$dbh->do(qq| |
|||
INSERT INTO marc_subfield_structure(frameworkcode,kohafield,tagfield,tagsubfield) |
|||
VALUES ('$frameworkcode', 'biblioitems.url', '856', 'u') |
|||
|); |
|||
my ( $url_field, $url_subfield ) = C4::Biblio::GetMarcFromKohaField('biblioitems.url', $frameworkcode); |
|||
|
|||
my $title = q|My title & a word & another word|; |
|||
my $url = q|http://www.example.org/index.pl?arg1=val1&arg2=val2|; |
|||
my $record = MARC::Record->new(); |
|||
$record->append_fields( |
|||
MARC::Field->new('100', ' ', ' ', a => 'my author'), |
|||
MARC::Field->new('245', ' ', ' ', a => $title), |
|||
MARC::Field->new($url_field, ' ', ' ', $url_subfield => $url ), |
|||
); |
|||
|
|||
my ($biblionumber, $biblioitemnumber) = AddBiblio($record, $frameworkcode); |
|||
my ( $sanitized_record, $has_been_modified ) = C4::Charset::SanitizeRecord( $record, $biblionumber ); |
|||
is( $has_been_modified, 0, 'SanitizeRecord: the record has not been modified' ); |
|||
is( $url, $sanitized_record->subfield($url_field, $url_subfield), 'SanitizeRecord: the url has not been modified'); |
|||
|
|||
$title = q|My title &amp;amp; a word &amp; another word|; |
|||
$record = MARC::Record->new(); |
|||
$record->append_fields( |
|||
MARC::Field->new('100', ' ', ' ', a => 'my author'), |
|||
MARC::Field->new('245', ' ', ' ', a => $title), |
|||
MARC::Field->new($url_field, ' ', ' ', $url_subfield => $url ), |
|||
); |
|||
|
|||
($biblionumber, $biblioitemnumber) = AddBiblio($record, $frameworkcode); |
|||
( $sanitized_record, $has_been_modified ) = C4::Charset::SanitizeRecord( $record, $biblionumber ); |
|||
is( $has_been_modified, 1, 'SanitizeRecord: the record has been modified' ); |
|||
is( $url, $sanitized_record->subfield($url_field, $url_subfield), 'SanitizeRecord: the url has not been modified'); |
|||
|
|||
$dbh->rollback; |
Loading…
Reference in new issue