Browse Source

Bug 23324: Add an ISBN normalization routine

To test:
1 - Set SearchEngine to ElasticSearch
2 - Stage the sample file (import it if it doesn't already exist in your catalog and then stage again)
3 - Set matching rule to ISBN
4 - No matches found
5 - Apply patch
6 - Apply no matchign rule
7 - Change the ISBN matching rule to use ISBN normalizer
8 - Apply matching rule for ISBN
9 - It matches!

Signed-off-by: Ron Houk <rhouk@ottumwapubliclibrary.org>
Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
remotes/origin/19.11.x
Nick Clemens 2 years ago
committed by Martin Renvoize
parent
commit
3d15819443
Signed by: martin.renvoize GPG Key ID: 422B469130441A0F
  1. 8
      C4/Matcher.pm
  2. 21
      Koha/Util/Normalize.pm
  3. 1
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt
  4. 49
      t/Matcher.t

8
C4/Matcher.pm

@ -24,7 +24,7 @@ use MARC::Record;
use Koha::SearchEngine;
use Koha::SearchEngine::Search;
use Koha::SearchEngine::QueryBuilder;
use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case/;
use Koha::Util::Normalize qw/legacy_default remove_spaces upper_case lower_case ISBN/;
=head1 NAME
@ -866,6 +866,9 @@ sub _get_match_keys {
elsif ( $norm eq 'legacy_default' ) {
$key = legacy_default($key);
}
elsif ( $norm eq 'ISBN' ) {
$key = ISBN($key);
}
} else {
warn "Invalid normalization routine required ($norm)"
unless $norm eq 'none';
@ -902,7 +905,8 @@ sub valid_normalization_routines {
'remove_spaces',
'upper_case',
'lower_case',
'legacy_default'
'legacy_default',
'ISBN'
);
}

21
Koha/Util/Normalize.pm

@ -18,6 +18,7 @@ package Koha::Util::Normalize;
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use Modern::Perl;
use Business::ISBN;
use parent qw( Exporter );
@ -26,6 +27,7 @@ our @EXPORT = qw(
remove_spaces
upper_case
lower_case
ISBN
);
=head1 NAME
@ -99,6 +101,25 @@ sub lower_case {
return $string;
}
=head2 ISBN
Normalization function converting ISBN strings to ISBN13
If string is not a valid ISBN we pass it through unaltered
=cut
sub ISBN {
my ( $string ) = @_;
return if !defined( $string );
my $isbn = Business::ISBN->new($string);
if (defined $isbn && $isbn->is_valid) {
$string = $isbn->as_isbn13->as_string([]);
}
return $string;
}
1;
__END__

1
koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt

@ -23,6 +23,7 @@
[% CASE 'upper_case' %]Uppercase
[% CASE 'lower_case' %]Lowercase
[% CASE 'legacy_default' %]Legacy default
[% CASE 'ISBN' %]ISBN
[% CASE %][% norm | html %]
[% END %]
[% END %]

49
t/Matcher.t

@ -82,7 +82,7 @@ is( $testmatcher->description(), 'match on ISSN', 'testing code accessor' );
subtest '_get_match_keys() tests' => sub {
plan tests => 17;
plan tests => 20;
my $matchpoint = get_title_matchpoint({
length => 0,
@ -92,6 +92,12 @@ subtest '_get_match_keys() tests' => sub {
my $record = MARC::Record->new();
$record->append_fields(
MARC::Field->new('020', '1', ' ',
a => '978-1451697216 (alk. paper)'),
MARC::Field->new('020', '1', ' ',
a => '145169721X (alk. paper)'),
MARC::Field->new('020', '1', ' ',
a => '1NOTISBN3'),
MARC::Field->new('100', '1', ' ',
a => 'King, Stephen',
d => 'd1947-'),
@ -242,6 +248,20 @@ subtest '_get_match_keys() tests' => sub {
is( $keys[0], ' .; THE T[]:,ALIS(M)/AN\'" STEPHEN KING, PETER STRAUB.',
'Match key correctly normalized if invalid normalization routine specified' );
$matchpoint = get_isbn_matchpoint({
length => 0,
norms => [ 'ISBN' ],
offset => 0
});
@keys = C4::Matcher::_get_match_keys( $record, $matchpoint );
is( $keys[0], '9781451697216',
'Match key correctly calculated as ISBN13 when ISBN normalizer used');
is( $keys[1], '9781451697216',
'Match key correctly calculated as ISBN13 when ISBN normalizer used');
is( $keys[2], '1NOTISBN3',
'Match key passed through if not an isbn when ISBN normalizer used');
};
sub get_title_matchpoint {
@ -311,3 +331,30 @@ sub get_authors_matchpoint {
return $matchpoint;
}
sub get_isbn_matchpoint {
my $params = shift;
my $length = $params->{length} // 0;
my $norms = $params->{norms} // [];
my $offset = $params->{offset} // 0;
my $matchpoint = {
components => [
{
length => $length,
norms => $norms,
offset => $offset,
subfields =>
{
a => 1
},
tag => '020'
},
],
index => "isbn",
score => 1000
};
return $matchpoint;
}
Loading…
Cancel
Save