From 39b17d0526081d32d07039d067018ec374614df2 Mon Sep 17 00:00:00 2001 From: Aleisha Amohia Date: Fri, 25 Mar 2022 03:12:31 +0000 Subject: [PATCH] Bug 30358: Strip leading/trailing whitespace characters from input fields when cataloguing This enhancement adds a system preference StripWhitespaceChars which, when enabled, will strip leading and trailing whitespace characters from all fields when cataloguing both bibliographic records and authority records. Whitespace characters that will be stripped are: - spaces - newlines - carriage returns - tabs To test: 1. Apply patch and install database updates 2. Go to Administration, system preferences, find the new StripWhitespaceChars preference. It should be "Don't strip" by default. Change it to "Strip". 3. Search for a biblio record and edit it. Put some leading or trailing whitespace characters in input fields and textarea fields and save. 4. Confirm these characters are removed when you save the record. 5. Repeat steps 3 and 4 for authority records. 6. Confirm tests pass t/db_dependent/Biblio/ModBiblioMarc.t Sponsored-by: Educational Services Australia SCIS Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Also strip inner newlines This patch amends the StripWhitespaceChars system preference to also strip inner newlines (line breaks and carriage returns) when enabled. Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Inner newlines should be replaced with a space Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Fixing tests and including for inner newlines Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Clarify syspref wording about fields affected Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Consider field has multiple subfields of same key To test: 1) Click the clone subfield button to make multiple subfields with the same key, i.e. 500$a$a$a 2) Save the record and confirm that the fields contain the correct data after whitespaces are stripped. Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Put multiple subfields fix on auth side Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) stripWhitespaceChars subroutine and tests To test: Confirm test plan above still works as expected and tests pass in t/Koha_MetadataRecord.t Sponsored-by: Catalyst IT Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Fixing ModBiblioMarc.t tests Signed-off-by: David Nind Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Do not strip whitespace from control fields Signed-off-by: Kyle M Hall Bug 30358: (follow-up) Simplify regex The regex does the following: 1. Replace newlines and carriage returns with a space 2. Replace leading and trailing whitespace with nothing (strip) Signed-off-by: Hammat Wele Signed-off-by: Kyle M Hall Signed-off-by: Tomas Cohen Arazi --- C4/AuthoritiesMarc.pm | 6 +++- C4/Biblio.pm | 5 +++ Koha/MetadataRecord.pm | 27 ++++++++++++++ ...0358_-_add_StripWhitespaceChars_syspref.pl | 12 +++++++ installer/data/mysql/mandatory/sysprefs.sql | 1 + .../admin/preferences/cataloguing.pref | 6 ++++ t/Koha_MetadataRecord.t | 23 +++++++++++- t/db_dependent/Biblio/ModBiblioMarc.t | 36 ++++++++++++++++++- 8 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 installer/data/mysql/atomicupdate/bug_30358_-_add_StripWhitespaceChars_syspref.pl diff --git a/C4/AuthoritiesMarc.pm b/C4/AuthoritiesMarc.pm index 02f9103de3..d21e7c059c 100644 --- a/C4/AuthoritiesMarc.pm +++ b/C4/AuthoritiesMarc.pm @@ -655,9 +655,13 @@ sub AddAuthority { $field->update($auth_type_subfield=>$authtypecode); } else { - $record->add_fields($auth_type_tag,'','', $auth_type_subfield=>$authtypecode); + $record->add_fields($auth_type_tag,'','', $auth_type_subfield=>$authtypecode); } + if ( C4::Context->preference('StripWhitespaceChars') ) { + $record = Koha::MetadataRecord::stripWhitespaceChars( $record ); + } + # Save record into auth_header, update 001 my $action; my $authority; diff --git a/C4/Biblio.pm b/C4/Biblio.pm index dbb4026816..cc88b20871 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -114,6 +114,7 @@ use Koha::SearchEngine; use Koha::SearchEngine::Indexer; use Koha::Libraries; use Koha::Util::MARC; +use Koha::MetadataRecord; =head1 NAME @@ -2811,6 +2812,10 @@ sub ModBiblioMarc { $f005->update(sprintf("%4d%02d%02d%02d%02d%04.1f",@a)) if $f005; } + if ( C4::Context->preference('StripWhitespaceChars') ) { + $record = Koha::MetadataRecord::stripWhitespaceChars( $record ); + } + my $metadata = { biblionumber => $biblionumber, format => 'marcxml', diff --git a/Koha/MetadataRecord.pm b/Koha/MetadataRecord.pm index 6f0aa1fb4b..bb7ec62be0 100644 --- a/Koha/MetadataRecord.pm +++ b/Koha/MetadataRecord.pm @@ -108,4 +108,31 @@ sub createMergeHash { } } +=head2 stripWhitespaceChars + + $record = Koha::MetadataRecord::stripWhitespaceChars( $record ); + +Strip leading and trailing whitespace characters from input fields. + +=cut + +sub stripWhitespaceChars { + my ( $record ) = @_; + + foreach my $field ( $record->fields ) { + unless ( $field->is_control_field ) { + foreach my $subfield ( $field->subfields ) { + my $key = $subfield->[0]; + my $value = $subfield->[1]; + $value =~ s/[\n\r]+/ /g; + $value =~ s/^\s+|\s+$//g; + $field->add_subfields( $key => $value ); # add subfield to the end of the subfield list + $field->delete_subfield( pos => 0 ); # delete the subfield at the top of the subfield list + } + } + } + + return $record; +} + 1; diff --git a/installer/data/mysql/atomicupdate/bug_30358_-_add_StripWhitespaceChars_syspref.pl b/installer/data/mysql/atomicupdate/bug_30358_-_add_StripWhitespaceChars_syspref.pl new file mode 100644 index 0000000000..1a6f0356e9 --- /dev/null +++ b/installer/data/mysql/atomicupdate/bug_30358_-_add_StripWhitespaceChars_syspref.pl @@ -0,0 +1,12 @@ +use Modern::Perl; + +return { + bug_number => "30358", + description => "Add new system preference StripWhitespaceChars", + up => sub { + my ($args) = @_; + my ($dbh, $out) = @$args{qw(dbh out)}; + + $dbh->do(q{INSERT IGNORE INTO systempreferences (variable,value,options,explanation,type) VALUES ('StripWhitespaceChars', '0', NULL, 'Strip leading and trailing whitespace characters and inner newlines from input fields when cataloguing bibliographic and authority records.', 'YesNo') }); + }, +}; diff --git a/installer/data/mysql/mandatory/sysprefs.sql b/installer/data/mysql/mandatory/sysprefs.sql index 2f1b46d889..7a7e9ab821 100644 --- a/installer/data/mysql/mandatory/sysprefs.sql +++ b/installer/data/mysql/mandatory/sysprefs.sql @@ -699,6 +699,7 @@ INSERT INTO systempreferences ( `variable`, `value`, `options`, `explanation`, ` ('StatisticsFields','location|itype|ccode', NULL, 'Define Fields (from the items table) used for statistics members','Free'), ('StockRotation','0',NULL,'If ON, enables the stock rotation module','YesNo'), ('StoreLastBorrower','0','','If ON, the last borrower to return an item will be stored in items.last_returned_by','YesNo'), +('StripWhitespaceChars','0',NULL,'Strip leading and trailing whitespace characters and inner newlines from input fields when cataloguing bibliographic and authority records.','YesNo'), ('SubfieldsToAllowForRestrictedBatchmod','','Define a list of subfields for which edition is authorized when items_batchmod_restricted permission is enabled, separated by spaces. Example: 995\$f 995\$h 995\$j',NULL,'Free'), ('SubfieldsToAllowForRestrictedEditing','','Define a list of subfields for which edition is authorized when edit_items_restricted permission is enabled, separated by spaces. Example: 995\$f 995\$h 995\$j',NULL,'Free'), ('SubfieldsToUseWhenPrefill','','','Define a list of subfields to use when prefilling items (separated by space)','Free'), diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref index 509153df23..611edf03f4 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref @@ -205,6 +205,12 @@ Cataloging: - pref: ContentWarningField - for storing content warnings. - "
NOTE: The field needs to appear in the MARC frameworks to be accessible." + - + - pref: StripWhitespaceChars + choices: + 1: Strip + 0: "Don't strip" + - leading and trailing whitespace characters (including spaces, tabs, line breaks and carriage returns) and inner newlines from data fields when cataloguing bibliographic and authority records. The leader and control fields will not be affected. Display: - - 'Separate main entry and subdivisions with ' diff --git a/t/Koha_MetadataRecord.t b/t/Koha_MetadataRecord.t index 0bcc4690dc..7202f4a86f 100755 --- a/t/Koha_MetadataRecord.t +++ b/t/Koha_MetadataRecord.t @@ -19,7 +19,7 @@ use Modern::Perl; -use Test::More tests => 5; +use Test::More tests => 6; use Test::Warn; use MARC::Record; @@ -143,3 +143,24 @@ subtest "new() tests" => sub { is( $metadata_record, undef, 'record object mandatory') }; +subtest "stripWhitespaceChars() tests" => sub { + plan tests => 2; + + # Test default values with a MARC::Record record + my $record = MARC::Record->new(); + + $record->add_fields( + [ '001', '1234' ], + [ '150', ' ', ' ', a => 'Test' ], + [ '520', ' ', ' ', a => "This is\na test!\t" ], + [ '521', ' ', ' ', a => "This is a\t test!\t" ], + ); + + $record = Koha::MetadataRecord::stripWhitespaceChars( $record ); + + my $get520a = $record->subfield('520','a'); + is( $get520a, "This is a test!", "Whitespace characters are appropriately stripped or replaced with spaces" ); + + my $get521a = $record->subfield('521','a'); + is( $get521a, "This is a\t test!", "Trailing tabs are stripped while inner tabs are kept" ); +}; diff --git a/t/db_dependent/Biblio/ModBiblioMarc.t b/t/db_dependent/Biblio/ModBiblioMarc.t index ac56746d2d..fc38684c39 100755 --- a/t/db_dependent/Biblio/ModBiblioMarc.t +++ b/t/db_dependent/Biblio/ModBiblioMarc.t @@ -17,7 +17,7 @@ use Modern::Perl; -use Test::More tests => 1; +use Test::More tests => 2; use t::lib::Mocks; use t::lib::TestBuilder; use MARC::Record; @@ -47,4 +47,38 @@ subtest "Check MARC field length calculation" => sub { like( substr($savedrec->leader,12,5), qr/^\d{5}$/, 'Base address found' ); }; +subtest "StripWhitespaceChars tests" => sub { + plan tests => 4; + + t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); + t::lib::Mocks::mock_preference('StripWhitespaceChars', 0); + + my $biblio = t::lib::TestBuilder->new->build_sample_biblio; + my $record = MARC::Record->new; + $record->append_fields( + MARC::Field->new( '003', "abcdefg\n" ), + MARC::Field->new( '245', '', '', a => " My\ntitle\n" ), + ); + + my $title = $record->title; + is( $title, " My\ntitle\n", 'Title has whitespace characters' ); + + C4::Biblio::ModBiblioMarc( $record, $biblio->biblionumber ); + $biblio = Koha::Biblios->find( $biblio->biblionumber ); + my $savedrec = $biblio->metadata->record; + my $savedtitle = $savedrec->title; + is( $savedtitle, " My\ntitle\n", "Title still has whitespace characters because StripWhitespaceChars is disabled" ); + + t::lib::Mocks::mock_preference('StripWhitespaceChars', 1); + + C4::Biblio::ModBiblioMarc( $record, $biblio->biblionumber ); + $biblio = Koha::Biblios->find( $biblio->biblionumber ); + my $amendedrec = $biblio->metadata->record; + my $amendedtitle = $amendedrec->title; + is( $amendedtitle, "My title", "Whitespace characters removed from title because StripWhitespaceChars is enabled" ); + + my $f003 = $record->field('003')->data; + is( $f003, "abcdefg\n", "Whitespace characters are not stripped from control fields" ); +}; + $schema->storage->txn_rollback; -- 2.39.2