From 68ced964fd254e7cba67a9b1b867907c557888c4 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Wed, 30 Dec 2015 23:14:37 +0000 Subject: [PATCH] Bug 14629 - Add aggressive ISSN matching feature equivalent to the aggressive ISBN matcher This patch adds a syspref "AggressiveMatchOnISSN" allowing for a match of ISSNs with or without hyphens. It uses Business::ISSN in order to follow the use of Business::ISBN and allow for validation of ISSNs To test: 1 - Find a record in your system with an ISSN (or add one) 2 - Stage a record containing the same ISSN but lacking a hyphen 3 - Matching on ISSN should find 0 matches 4 - Repeat with no hyphen ISSN in system and hyphen ISSN in import 5 - Matching should find 0 6 - Apply patch 7 - Update datbase and install Business::ISSN 8 - Leave AggressiveMatchOnISSN as don't and repeat original tests- no change 9 - Set AggressiveMatchOnISSN as do and repeat original test 10 - You should find a match 11 - prove t/Koha.t - all tests pass Sponsored by North Central Regional Library System (NCRL) www.ncrl.org Signed-off-by: Chad Roseburg Signed-off-by: Marcel de Rooy Signed-off-by: Kyle M Hall --- C4/Installer/PerlDependencies.pm | 5 ++ C4/Koha.pm | 89 +++++++++++++++++++ C4/Matcher.pm | 7 +- ...14629-add_AgressiveMatchOnISSN_syspref.sql | 1 + .../admin/preferences/cataloguing.pref | 8 ++ t/Koha.t | 18 +++- 6 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 installer/data/mysql/atomicupdate/bug_14629-add_AgressiveMatchOnISSN_syspref.sql diff --git a/C4/Installer/PerlDependencies.pm b/C4/Installer/PerlDependencies.pm index 76d6cbdc66..b531e6c97f 100644 --- a/C4/Installer/PerlDependencies.pm +++ b/C4/Installer/PerlDependencies.pm @@ -522,6 +522,11 @@ our $PERL_DEPS = { 'required' => '1', 'min_ver' => '2.05', }, + 'Business::ISSN' => { + 'usage' => 'Core', + 'required' => '1', + 'min_ver' => '0.91', + }, 'Template' => { 'usage' => 'Core', 'required' => '1', diff --git a/C4/Koha.pm b/C4/Koha.pm index fb7f30c25b..3b990299cd 100644 --- a/C4/Koha.pm +++ b/C4/Koha.pm @@ -31,6 +31,7 @@ use Koha::Libraries; use Koha::MarcSubfieldStructures; use DateTime::Format::MySQL; use Business::ISBN; +use Business::ISSN; use autouse 'Data::cselectall_arrayref' => qw(Dumper); use DBI qw(:sql_types); use vars qw(@ISA @EXPORT @EXPORT_OK $DEBUG); @@ -63,6 +64,9 @@ BEGIN { &GetVariationsOfISBN &GetVariationsOfISBNs &NormalizeISBN + &GetVariationsOfISSN + &GetVariationsOfISSNs + &NormalizeISSN $DEBUG ); @@ -1337,6 +1341,91 @@ sub GetVariationsOfISBNs { return wantarray ? @isbns : join( " | ", @isbns ); } +=head2 NormalizedISSN + + my $issns = NormalizedISSN({ + issn => $issn, + strip_hyphen => [0,1] + }); + + Returns an issn validated by Business::ISSN. + Optionally strips hyphen. + + If the string cannot be validated as an issn, + it returns nothing. + +=cut + +sub NormalizeISSN { + my ($params) = @_; + + my $string = $params->{issn}; + my $strip_hyphen = $params->{strip_hyphen}; + + my $issn = Business::ISSN->new($string); + + if ( $issn && $issn->is_valid ){ + + if ($strip_hyphen) { + $string = $issn->_issn; + } + else { + $string = $issn->as_string; + } + return $string; + } + +} + +=head2 GetVariationsOfISSN + + my @issns = GetVariationsOfISSN( $issn ); + + Returns a list of variations of the given issn in + with and without a hyphen. + + In a scalar context, the issns are returned as a + string delimited by ' | '. + +=cut + +sub GetVariationsOfISSN { + my ($issn) = @_; + + return unless $issn; + + my @issns; + + push( @issns, NormalizeISSN({ issn => $issn }) ); + push( @issns, NormalizeISSN({ issn => $issn, strip_hyphen => 1 }) ); + + # Strip out any "empty" strings from the array + @issns = grep { defined($_) && $_ =~ /\S/ } @issns; + + return wantarray ? @issns : join( " | ", @issns ); +} + +=head2 GetVariationsOfISSNs + + my @issns = GetVariationsOfISSNs( @issns ); + + Returns a list of variations of the given issns in + with and without a hyphen. + + In a scalar context, the issns are returned as a + string delimited by ' | '. + +=cut + +sub GetVariationsOfISSNs { + my (@issns) = @_; + + @issns = map { GetVariationsOfISSN( $_ ) } @issns; + + return wantarray ? @issns : join( " | ", @issns ); +} + + =head2 IsKohaFieldLinked my $is_linked = IsKohaFieldLinked({ diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 3c4c391008..40e6e5f030 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -637,6 +637,11 @@ sub get_matches { && C4::Context->preference('AggressiveMatchOnISBN') ) && !C4::Context->preference('UseQueryParser'); + @source_keys = C4::Koha::GetVariationsOfISSNs(@source_keys) + if ( $matchpoint->{index} =~ /^issn$/i + && C4::Context->preference('AggressiveMatchOnISSN') ) + && !C4::Context->preference('UseQueryParser'); + # build query my $query; my $error; @@ -649,7 +654,7 @@ sub get_matches { map { "$matchpoint->{'index'}:$_" } @source_keys ); } else { - my $phr = C4::Context->preference('AggressiveMatchOnISBN') ? ',phr' : q{}; + my $phr = ( C4::Context->preference('AggressiveMatchOnISBN') || C4::Context->preference('AggressiveMatchOnISSN') ) ? ',phr' : q{}; $query = join( " or ", map { "$matchpoint->{'index'}$phr=\"$_\"" } @source_keys ); #NOTE: double-quote the values so you don't get a "Embedded truncation not supported" error when a term has a ? in it. diff --git a/installer/data/mysql/atomicupdate/bug_14629-add_AgressiveMatchOnISSN_syspref.sql b/installer/data/mysql/atomicupdate/bug_14629-add_AgressiveMatchOnISSN_syspref.sql new file mode 100644 index 0000000000..363254baf6 --- /dev/null +++ b/installer/data/mysql/atomicupdate/bug_14629-add_AgressiveMatchOnISSN_syspref.sql @@ -0,0 +1 @@ +INSERT IGNORE INTO systempreferences (variable,value,explanation,options,type) VALUES ('AggressiveMatchOnISSN','0','If enabled, attempt to match aggressively by trying all variations of the ISSNs in the imported record as a phrase in the ISSN fields of already cataloged records when matching on ISSN with the record import tool','','YesNo') diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref index 02d8728104..1aeadf9860 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref @@ -224,6 +224,14 @@ Cataloging: yes: "do" no: "don't" - attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records. Note that this preference has no effect if UseQueryParser is on. + - + - When matching on ISSN with the record import tool, + - pref: AggressiveMatchOnISSN + choices: + yes: "do" + no: "don't" + - attempt to match aggressively by trying all variations of the ISSNs in the imported record as a phrase in the ISSN fields of already cataloged records. Note that this preference has no effect if UseQueryParser is on. + Exporting: - - Include following fields when exporting BibTeX, diff --git a/t/Koha.t b/t/Koha.t index 21cbf68d59..500dcb5c9e 100755 --- a/t/Koha.t +++ b/t/Koha.t @@ -25,7 +25,7 @@ use Module::Load::Conditional qw/check_install/; BEGIN { if ( check_install( module => 'Test::DBIx::Class' ) ) { - plan tests => 31; + plan tests => 37; } else { plan skip_all => "Need Test::DBIx::Class" } @@ -138,4 +138,20 @@ subtest 'getFacets() tests' => sub { ); }; +is(C4::Koha::NormalizeISSN({ issn => '0024-9319', strip_hyphen => 1 }), '00249319', 'Test NormalizeISSN with all features enabled' ); +is(C4::Koha::NormalizeISSN({ issn => '0024-9319', strip_hyphen => 0 }), '0024-9319', 'Test NormalizeISSN with all features enabled' ); + +my @issns = qw/ 0024-9319 00249319 /; +is( join('|', @issns), join('|', GetVariationsOfISSN('0024-9319')), 'GetVariationsOfISSN returns all variations' ); +is( join('|', @issns), join('|', GetVariationsOfISSNs('0024-9319')), 'GetVariationsOfISSNs returns all variations' ); + +my $issn; +eval { + $issn = C4::Koha::NormalizeISSN({ issn => '1234-5678', strip_hyphen => 1 }); +}; +ok($@ eq '', 'NormalizeISSN does not throw exception when parsing invalid ISSN'); + +@issns = GetVariationsOfISSNs('abc'); +is(scalar(@issns), 0, 'zero variations returned of invalid ISSN'); + 1; -- 2.39.5