From bb6f4f13ce94231edbbdb5530f1d9734e02fc526 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Tue, 16 Jul 2019 13:11:06 +0000 Subject: [PATCH] Bug 23324: Add an ISBN normalization routine To test: 1 - Set SearchEngine to ElasticSearch 2 - Stage the sample file (import it if it doesn't already exist in your catalog and then stage again) 3 - Set matching rule to ISBN 4 - No matches found 5 - Apply patch 6 - Apply no matchign rule 7 - Change the ISBN matching rule to use ISBN normalizer 8 - Apply matching rule for ISBN 9 - It matches! Signed-off-by: Ron Houk Signed-off-by: Marcel de Rooy Signed-off-by: Martin Renvoize (cherry picked from commit 3d15819443bd114a5e61a7a9ee947a3307d39b8f) Signed-off-by: Fridolin Somers --- C4/Matcher.pm | 8 ++- Koha/Util/Normalize.pm | 21 ++++++++ .../prog/en/modules/admin/matching-rules.tt | 1 + t/Matcher.t | 49 ++++++++++++++++++- 4 files changed, 76 insertions(+), 3 deletions(-) diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 55b749e697..7222b4abdb 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -24,7 +24,7 @@ use MARC::Record; use Koha::SearchEngine; use Koha::SearchEngine::Search; use Koha::SearchEngine::QueryBuilder; -use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case/; +use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case ISBN/; =head1 NAME @@ -867,6 +867,9 @@ sub _get_match_keys { elsif ( $norm eq 'legacy_default' ) { $key = legacy_default($key); } + elsif ( $norm eq 'ISBN' ) { + $key = ISBN($key); + } } else { warn "Invalid normalization routine required ($norm)" unless $norm eq 'none'; @@ -903,7 +906,8 @@ sub valid_normalization_routines { 'remove_spaces', 'upper_case', 'lower_case', - 'legacy_default' + 'legacy_default', + 'ISBN' ); } diff --git a/Koha/Util/Normalize.pm b/Koha/Util/Normalize.pm index a654c3fd87..efe4839d4e 100644 --- a/Koha/Util/Normalize.pm +++ b/Koha/Util/Normalize.pm @@ -18,6 +18,7 @@ package Koha::Util::Normalize; # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. use Modern::Perl; +use Business::ISBN; use parent qw( Exporter ); @@ -26,6 +27,7 @@ our @EXPORT = qw( remove_spaces upper_case lower_case + ISBN ); =head1 NAME @@ -99,6 +101,25 @@ sub lower_case { return $string; } +=head2 ISBN + +Normalization function converting ISBN strings to ISBN13 +If string is not a valid ISBN we pass it through unaltered + +=cut + +sub ISBN { + my ( $string ) = @_; + return if !defined( $string ); + + my $isbn = Business::ISBN->new($string); + if (defined $isbn && $isbn->is_valid) { + $string = $isbn->as_isbn13->as_string([]); + } + + return $string; +} + 1; __END__ diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt index f2fdcd3f98..9eccfaa329 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt @@ -23,6 +23,7 @@ [% CASE 'upper_case' %]Uppercase [% CASE 'lower_case' %]Lowercase [% CASE 'legacy_default' %]Legacy default +[% CASE 'ISBN' %]ISBN [% CASE %][% norm | html %] [% END %] [% END %] diff --git a/t/Matcher.t b/t/Matcher.t index 3a5494e26e..e7dbeee6a3 100755 --- a/t/Matcher.t +++ b/t/Matcher.t @@ -82,7 +82,7 @@ is( $testmatcher->description(), 'match on ISSN', 'testing code accessor' ); subtest '_get_match_keys() tests' => sub { - plan tests => 17; + plan tests => 20; my $matchpoint = get_title_matchpoint({ length => 0, @@ -92,6 +92,12 @@ subtest '_get_match_keys() tests' => sub { my $record = MARC::Record->new(); $record->append_fields( + MARC::Field->new('020', '1', ' ', + a => '978-1451697216 (alk. paper)'), + MARC::Field->new('020', '1', ' ', + a => '145169721X (alk. paper)'), + MARC::Field->new('020', '1', ' ', + a => '1NOTISBN3'), MARC::Field->new('100', '1', ' ', a => 'King, Stephen', d => 'd1947-'), @@ -242,6 +248,20 @@ subtest '_get_match_keys() tests' => sub { is( $keys[0], ' .; THE T[]:,ALIS(M)/AN\'" STEPHEN KING, PETER STRAUB.', 'Match key correctly normalized if invalid normalization routine specified' ); + + $matchpoint = get_isbn_matchpoint({ + length => 0, + norms => [ 'ISBN' ], + offset => 0 + }); + @keys = C4::Matcher::_get_match_keys( $record, $matchpoint ); + is( $keys[0], '9781451697216', + 'Match key correctly calculated as ISBN13 when ISBN normalizer used'); + is( $keys[1], '9781451697216', + 'Match key correctly calculated as ISBN13 when ISBN normalizer used'); + is( $keys[2], '1NOTISBN3', + 'Match key passed through if not an isbn when ISBN normalizer used'); + }; sub get_title_matchpoint { @@ -311,3 +331,30 @@ sub get_authors_matchpoint { return $matchpoint; } +sub get_isbn_matchpoint { + + my $params = shift; + + my $length = $params->{length} // 0; + my $norms = $params->{norms} // []; + my $offset = $params->{offset} // 0; + + my $matchpoint = { + components => [ + { + length => $length, + norms => $norms, + offset => $offset, + subfields => + { + a => 1 + }, + tag => '020' + }, + ], + index => "isbn", + score => 1000 + }; + + return $matchpoint; +} -- 2.39.5