Koha/t/db_dependent/Charset.t
Jonathan Druart 299a8a6997 Bug 8218 : Add a maintenance script to sanitize biblio records
This patch adds:
- a new maintenance script batch_sanitize_records
- a new subroutine C4::Charset::SanitizeRecord
- new unit tests for the new subroutine

Test plan:
1/ prove t/db_dependent/Charset.t
2/ Create a record containing "&" (could be follow with as many
'amp;' as you want) in one of its fields and the same for the field
linked to biblioitems.url.
The url should not be sanitized, it may contain "&".
3/ Launch the maintenance script with the -h parameter to see how to use
it.
4/ Launch the script using the different parameters:
 --filename=FILENAME
 --biblionumbers='XXX'
 --auto-search

The auto-search permits to sanitize all records containing "&" in
the marcxml field.

Use the verbose flag for testing.
Without the --confirm flag, nothing is done.

5/ Use the --confirm flag and verify in the biblioitems.marcxml field
that the record has been sanitized.

6/ Try the --reindex flag to reindex records which have been modified.

Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@gmail.com>
2014-11-11 15:38:36 -03:00

51 lines
2 KiB
Perl

use Modern::Perl;
use Test::More tests => 4;
use MARC::Record;
use C4::Biblio qw( AddBiblio SetFieldMapping GetMarcFromKohaField );
use C4::Context;
use C4::Charset qw( SanitizeRecord );
my $dbh = C4::Context->dbh;
$dbh->{RaiseError} = 1;
$dbh->{AutoCommit} = 0;
my $frameworkcode = q||;
$dbh->do(q|
DELETE FROM marc_subfield_structure WHERE kohafield='biblioitems.url'
|);
$dbh->do(qq|
INSERT INTO marc_subfield_structure(frameworkcode,kohafield,tagfield,tagsubfield)
VALUES ('$frameworkcode', 'biblioitems.url', '856', 'u')
|);
my ( $url_field, $url_subfield ) = C4::Biblio::GetMarcFromKohaField('biblioitems.url', $frameworkcode);
my $title = q|My title & a word & another word|;
my $url = q|http://www.example.org/index.pl?arg1=val1&amp;arg2=val2|;
my $record = MARC::Record->new();
$record->append_fields(
MARC::Field->new('100', ' ', ' ', a => 'my author'),
MARC::Field->new('245', ' ', ' ', a => $title),
MARC::Field->new($url_field, ' ', ' ', $url_subfield => $url ),
);
my ($biblionumber, $biblioitemnumber) = AddBiblio($record, $frameworkcode);
my ( $sanitized_record, $has_been_modified ) = C4::Charset::SanitizeRecord( $record, $biblionumber );
is( $has_been_modified, 0, 'SanitizeRecord: the record has not been modified' );
is( $url, $sanitized_record->subfield($url_field, $url_subfield), 'SanitizeRecord: the url has not been modified');
$title = q|My title &amp;amp;amp; a word &amp;amp; another word|;
$record = MARC::Record->new();
$record->append_fields(
MARC::Field->new('100', ' ', ' ', a => 'my author'),
MARC::Field->new('245', ' ', ' ', a => $title),
MARC::Field->new($url_field, ' ', ' ', $url_subfield => $url ),
);
($biblionumber, $biblioitemnumber) = AddBiblio($record, $frameworkcode);
( $sanitized_record, $has_been_modified ) = C4::Charset::SanitizeRecord( $record, $biblionumber );
is( $has_been_modified, 1, 'SanitizeRecord: the record has been modified' );
is( $url, $sanitized_record->subfield($url_field, $url_subfield), 'SanitizeRecord: the url has not been modified');
$dbh->rollback;