From ed4b60fa1dca40d0d0bcf4880fd5f2892a15f90f Mon Sep 17 00:00:00 2001 From: Kyle M Hall Date: Thu, 20 Jun 2013 10:45:44 -0400 Subject: [PATCH] Bug 10500: Improve ISBN matching when importing records Test Plan: 1) Catalog a record with the ISBN "0394502884 (Random House)" 2) Export the record, edit it so the ISBN is now "0394502884 (UnRandomHouse)" 3) Using the record import tool, import this record with matching on ISBN. 4) You should not find a match 5) Apply this patch 6) Run updatedatabase.pl 7) Enable the new system preference AggressiveMatchOnISBN 8) Repeat step 3 9) The tool should now find a match Signed-off-by: Tom McMurdo Signed-off-by: Jonathan Druart Signed-off-by: Martin Renvoize Signed-off-by: Galen Charlton --- C4/Koha.pm | 111 ++++++++++++++++-- C4/Matcher.pm | 13 +- installer/data/mysql/sysprefs.sql | 1 + installer/data/mysql/updatedatabase.pl | 20 ++++ .../admin/preferences/cataloguing.pref | 8 ++ t/Koha.t | 8 +- 6 files changed, 151 insertions(+), 10 deletions(-) diff --git a/C4/Koha.pm b/C4/Koha.pm index 68652ff53f..062a98f96a 100644 --- a/C4/Koha.pm +++ b/C4/Koha.pm @@ -28,6 +28,7 @@ use C4::Branch qw(GetBranchesCount); use Koha::DateUtils qw(dt_from_string); use Memoize; use DateTime::Format::MySQL; +use Business::ISBN; use autouse 'Data::Dumper' => qw(Dumper); use DBI qw(:sql_types); @@ -71,6 +72,10 @@ BEGIN { &GetNormalizedOCLCNumber &xml_escape + &GetVariationsOfISBN + &GetVariationsOfISBNs + &NormalizeISBN + $DEBUG ); $DEBUG = 0; @@ -1552,15 +1557,107 @@ sub _normalize_match_point { } sub _isbn_cleanup { - require Business::ISBN; - my $isbn = Business::ISBN->new( $_[0] ); - if ( $isbn ) { - $isbn = $isbn->as_isbn10 if $isbn->type eq 'ISBN13'; - if (defined $isbn) { - return $isbn->as_string([]); + my ($isbn) = @_; + return NormalizeISBN( + { + isbn => $isbn, + format => 'ISBN-10', + strip_hyphens => 1, } + ); +} + +=head2 NormalizedISBN + + my $isbns = NormalizedISBN({ + isbn => $isbn, + strip_hyphens => [0,1], + format => ['ISBN-10', 'ISBN-13'] + }); + + Returns an isbn validated by Business::ISBN. + Optionally strips hyphens and/or forces the isbn + to be of the specified format. + + If the string cannot be validated as an isbn, + it returns nothing. + +=cut + +sub NormalizeISBN { + my ($params) = @_; + + my $string = $params->{isbn}; + my $strip_hyphens = $params->{strip_hyphens}; + my $format = $params->{format}; + + my $isbn = Business::ISBN->new($string); + + if ( $isbn && $isbn->error != Business::ISBN::BAD_ISBN ) { + + if ( $format eq 'ISBN-10' ) { + $isbn = $isbn->as_isbn10(); + } + elsif ( $format eq 'ISBN-13' ) { + $isbn = $isbn->as_isbn13(); + } + + if ($strip_hyphens) { + $string = $isbn->as_string( [] ); + } else { + $string = $isbn->as_string(); + } + + return $string; } - return; +} + +=head2 GetVariationsOfISBN + + my @isbns = GetVariationsOfISBN( $isbn ); + + Returns a list of varations of the given isbn in + both ISBN-10 and ISBN-13 formats, with and without + hyphens. + + In a scalar context, the isbns are returned as a + string delimited by ' | '. + +=cut + +sub GetVariationsOfISBN { + my ($isbn) = @_; + + my @isbns; + + push( @isbns, NormalizeISBN({ isbn => $isbn }) ); + push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-10' }) ); + push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-13' }) ); + push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-10', strip_hyphens => 1 }) ); + push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-13', strip_hyphens => 1 }) ); + + return wantarray ? @isbns : join( " | ", @isbns ); +} + +=head2 GetVariationsOfISBNs + + my @isbns = GetVariationsOfISBNs( @isbns ); + + Returns a list of varations of the given isbns in + both ISBN-10 and ISBN-13 formats, with and without + hyphens. + + In a scalar context, the isbns are returned as a + string delimited by ' | '. + +=cut + +sub GetVariationsOfISBNs { + my (@isbns) = @_; + + @isbns = map { GetVariationsOfISBN( $_ ) } @isbns; + + return wantarray ? @isbns : join( " | ", @isbns ); } 1; diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 3615aa1ef2..5c27fa3615 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -630,23 +630,32 @@ sub get_matches { $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser')); foreach my $matchpoint ( @{ $self->{'matchpoints'} } ) { my @source_keys = _get_match_keys( $source_record, $matchpoint ); + next if scalar(@source_keys) == 0; + @source_keys = C4::Koha::GetVariationsOfISBNs(@source_keys) + if ( $matchpoint->{index} eq 'isbn' + && C4::Context->preference('AggressiveMatchOnISBN') ); + # build query my $query; my $error; my $searchresults; my $total_hits; if ( $self->{'record_type'} eq 'biblio' ) { + my $phr = C4::Context->preference('AggressiveMatchOnISBN') ? ',phr' : q{}; + if ($QParser) { $query = join( " || ", - map { "$matchpoint->{'index'}:$_" } @source_keys ); + map { "$matchpoint->{'index'}$phr:$_" } @source_keys ); } else { $query = join( " or ", - map { "$matchpoint->{'index'}=$_" } @source_keys ); + map { "$matchpoint->{'index'}$phr=$_" } @source_keys ); } + require C4::Search; + ( $error, $searchresults, $total_hits ) = C4::Search::SimpleSearch( $query, 0, $max_matches ); } diff --git a/installer/data/mysql/sysprefs.sql b/installer/data/mysql/sysprefs.sql index 6e073ec25c..b06dd72a6e 100644 --- a/installer/data/mysql/sysprefs.sql +++ b/installer/data/mysql/sysprefs.sql @@ -10,6 +10,7 @@ INSERT INTO systempreferences ( `variable`, `value`, `options`, `explanation`, ` ('AdvancedSearchTypes','itemtypes','itemtypes|ccode','Select which set of fields comprise the Type limit in the advanced search','Choice'), ('AgeRestrictionMarker','',NULL,'Markers for age restriction indication, e.g. FSK|PEGI|Age|','free'), ('AgeRestrictionOverride','0',NULL,'Allow staff to check out an item with age restriction.','YesNo'), +('AggressiveMatchOnISBN','0','If enabled, attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records when matching on ISBN with the record import tool','YesNo'), ('AllFinesNeedOverride','1','0','If on, staff will be asked to override every fine, even if it is below noissuescharge.','YesNo'), ('AllowAllMessageDeletion','0','','Allow any Library to delete any message','YesNo'), ('AllowFineOverride','0','0','If on, staff will be able to issue books to patrons with fines greater than noissuescharge.','YesNo'), diff --git a/installer/data/mysql/updatedatabase.pl b/installer/data/mysql/updatedatabase.pl index 4a0c0792bb..2c4f4953ee 100755 --- a/installer/data/mysql/updatedatabase.pl +++ b/installer/data/mysql/updatedatabase.pl @@ -8439,6 +8439,26 @@ if ( C4::Context->preference("Version") < TransformToNum($DBversion) ) { SetVersion($DBversion); } +$DBversion = "3.15.00.XXX"; +if ( CheckVersion($DBversion) ) { + $dbh->do(" + INSERT INTO systempreferences ( + variable, + value, + explanation, + type + ) VALUES ( + 'AggressiveMatchOnISBN', + '0', + 'If enabled, attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records when matching on ISBN with the record import tool', + 'YesNo' + ) + "); + + print "Upgrade to $DBversion done (Bug 10500 - Improve isbn matching when importing records)\n"; + SetVersion($DBversion); +} + =head1 FUNCTIONS =head2 TableExists($table) diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref index c3bdd49413..a52180f799 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref @@ -191,3 +191,11 @@ Cataloging: yes: Display no: "Don't display" - acquisition details on the biblio detail page. + Importing: + - + - When matching on ISBN with the record import tool, + - pref: AggressiveMatchOnISBN + choices: + yes: "do" + no: "don't" + - attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records. diff --git a/t/Koha.t b/t/Koha.t index 3fde068a13..0cdbbb36fd 100755 --- a/t/Koha.t +++ b/t/Koha.t @@ -3,7 +3,7 @@ use strict; use warnings; use C4::Context; -use Test::More tests => 11; +use Test::More tests => 14; use Test::MockModule; use DBD::Mock; @@ -61,3 +61,9 @@ is(C4::Koha::_isbn_cleanup('0-590-35340-3'), '0590353403', '_isbn_cleanup remove is(C4::Koha::_isbn_cleanup('0590353403 (pbk.)'), '0590353403', '_isbn_cleanup removes parenthetical'); is(C4::Koha::_isbn_cleanup('978-0-321-49694-2'), '0321496949', '_isbn_cleanup converts ISBN-13 to ISBN-10'); +is(C4::Koha::NormalizeISBN({ isbn => '978-0-321-49694-2 (pbk.)', format => 'ISBN-10', strip_hyphens => 1 }), '0321496949', 'Test NormalizeISBN with all features enabled' ); + +my @isbns = qw/ 978-0-321-49694-2 0-321-49694-9 978-0-321-49694-2 0321496949 9780321496942/; +is( join('|', @isbns), join('|', GetVariationsOfISBN('978-0-321-49694-2 (pbk.)')), 'GetVariationsOfISBN returns all variations' ); + +is( join('|', @isbns), join('|', GetVariationsOfISBNs('978-0-321-49694-2 (pbk.)')), 'GetVariationsOfISBNs returns all variations' ); -- 2.39.5