From bb6f4f13ce94231edbbdb5530f1d9734e02fc526 Mon Sep 17 00:00:00 2001
From: Nick Clemens <nick@bywatersolutions.com>
Date: Tue, 16 Jul 2019 13:11:06 +0000
Subject: [PATCH] Bug 23324: Add an ISBN normalization routine

To test:
1 - Set SearchEngine to ElasticSearch
2 - Stage the sample file (import it if it doesn't already exist in your catalog and then stage again)
3 - Set matching rule to ISBN
4 - No matches found
5 - Apply patch
6 - Apply no matchign rule
7 - Change the ISBN matching rule to use ISBN normalizer
8 - Apply matching rule for ISBN
9 - It matches!

Signed-off-by: Ron Houk <rhouk@ottumwapubliclibrary.org>
Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
(cherry picked from commit 3d15819443bd114a5e61a7a9ee947a3307d39b8f)
Signed-off-by: Fridolin Somers <fridolin.somers@biblibre.com>
---
 C4/Matcher.pm                                 |  8 ++-
 Koha/Util/Normalize.pm                        | 21 ++++++++
 .../prog/en/modules/admin/matching-rules.tt   |  1 +
 t/Matcher.t                                   | 49 ++++++++++++++++++-
 4 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/C4/Matcher.pm b/C4/Matcher.pm
index 55b749e697..7222b4abdb 100644
--- a/C4/Matcher.pm
+++ b/C4/Matcher.pm
@@ -24,7 +24,7 @@ use MARC::Record;
 use Koha::SearchEngine;
 use Koha::SearchEngine::Search;
 use Koha::SearchEngine::QueryBuilder;
-use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case/;
+use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case ISBN/;
 
 =head1 NAME
 
@@ -867,6 +867,9 @@ sub _get_match_keys {
                     elsif ( $norm eq 'legacy_default' ) {
                         $key = legacy_default($key);
                     }
+                    elsif ( $norm eq 'ISBN' ) {
+                        $key = ISBN($key);
+                    }
                 } else {
                     warn "Invalid normalization routine required ($norm)"
                         unless $norm eq 'none';
@@ -903,7 +906,8 @@ sub valid_normalization_routines {
         'remove_spaces',
         'upper_case',
         'lower_case',
-        'legacy_default'
+        'legacy_default',
+        'ISBN'
     );
 }
 
diff --git a/Koha/Util/Normalize.pm b/Koha/Util/Normalize.pm
index a654c3fd87..efe4839d4e 100644
--- a/Koha/Util/Normalize.pm
+++ b/Koha/Util/Normalize.pm
@@ -18,6 +18,7 @@ package Koha::Util::Normalize;
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
 use Modern::Perl;
+use Business::ISBN;
 
 use parent qw( Exporter );
 
@@ -26,6 +27,7 @@ our @EXPORT = qw(
   remove_spaces
   upper_case
   lower_case
+  ISBN
 );
 
 =head1 NAME
@@ -99,6 +101,25 @@ sub lower_case {
     return $string;
 }
 
+=head2 ISBN
+
+Normalization function converting ISBN strings to ISBN13
+If string is not a valid ISBN we pass it through unaltered
+
+=cut
+
+sub ISBN {
+    my ( $string ) = @_;
+    return if !defined( $string );
+
+    my $isbn = Business::ISBN->new($string);
+    if (defined $isbn && $isbn->is_valid) {
+        $string = $isbn->as_isbn13->as_string([]);
+    }
+
+    return $string;
+}
+
 1;
 __END__
 
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt
index f2fdcd3f98..9eccfaa329 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt
@@ -23,6 +23,7 @@
 [%        CASE 'upper_case'     %]Uppercase
 [%        CASE 'lower_case'     %]Lowercase
 [%        CASE 'legacy_default' %]Legacy default
+[%        CASE 'ISBN' %]ISBN
 [%        CASE %][% norm | html %]
 [%    END %]
 [% END %]
diff --git a/t/Matcher.t b/t/Matcher.t
index 3a5494e26e..e7dbeee6a3 100755
--- a/t/Matcher.t
+++ b/t/Matcher.t
@@ -82,7 +82,7 @@ is( $testmatcher->description(), 'match on ISSN', 'testing code accessor' );
 
 subtest '_get_match_keys() tests' => sub {
 
-    plan tests => 17;
+    plan tests => 20;
 
     my $matchpoint = get_title_matchpoint({
         length => 0,
@@ -92,6 +92,12 @@ subtest '_get_match_keys() tests' => sub {
 
     my $record = MARC::Record->new();
     $record->append_fields(
+        MARC::Field->new('020', '1', ' ',
+                            a => '978-1451697216 (alk. paper)'),
+        MARC::Field->new('020', '1', ' ',
+                            a => '145169721X (alk. paper)'),
+        MARC::Field->new('020', '1', ' ',
+                            a => '1NOTISBN3'),
         MARC::Field->new('100', '1', ' ',
                             a => 'King, Stephen',
                             d => 'd1947-'),
@@ -242,6 +248,20 @@ subtest '_get_match_keys() tests' => sub {
 
     is( $keys[0], '  .; THE T[]:,ALIS(M)/AN\'" STEPHEN KING, PETER STRAUB.',
         'Match key correctly normalized if invalid normalization routine specified' );
+
+    $matchpoint = get_isbn_matchpoint({
+        length => 0,
+        norms  => [ 'ISBN' ],
+        offset => 0
+    });
+    @keys = C4::Matcher::_get_match_keys( $record, $matchpoint );
+    is( $keys[0], '9781451697216',
+        'Match key correctly calculated as ISBN13 when ISBN normalizer used');
+    is( $keys[1], '9781451697216',
+        'Match key correctly calculated as ISBN13 when ISBN normalizer used');
+    is( $keys[2], '1NOTISBN3',
+        'Match key passed through if not an isbn when ISBN normalizer used');
+
 };
 
 sub get_title_matchpoint {
@@ -311,3 +331,30 @@ sub get_authors_matchpoint {
     return $matchpoint;
 }
 
+sub get_isbn_matchpoint {
+
+    my $params = shift;
+
+    my $length = $params->{length} // 0;
+    my $norms  = $params->{norms}  // [];
+    my $offset = $params->{offset} // 0;
+
+    my $matchpoint = {
+        components =>  [
+            {
+                length    => $length,
+                norms     => $norms,
+                offset    => $offset,
+                subfields =>
+                    {
+                        a => 1
+                    },
+                tag => '020'
+            },
+        ],
+        index => "isbn",
+        score => 1000
+    };
+
+    return $matchpoint;
+}
-- 
2.39.5