Browse Source

Bug 10500: Improve ISBN matching when importing records

Test Plan:
1) Catalog a record with the ISBN "0394502884 (Random House)"
2) Export the record, edit it so the ISBN is now
   "0394502884 (UnRandomHouse)"
3) Using the record import tool, import this record with matching
   on ISBN.
4) You should not find a match
5) Apply this patch
6) Run updatedatabase.pl
7) Enable the new system preference AggressiveMatchOnISBN
8) Repeat step 3
9) The tool should now find a match

Signed-off-by: Tom McMurdo <thomas.mcmurdo@state.vp.us>
Signed-off-by: Jonathan Druart <jonathan.druart@biblibre.com>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
Signed-off-by: Galen Charlton <gmc@esilibrary.com>
3.16.x
Kyle Hall 11 years ago
committed by Galen Charlton
parent
commit
ed4b60fa1d
  1. 111
      C4/Koha.pm
  2. 13
      C4/Matcher.pm
  3. 1
      installer/data/mysql/sysprefs.sql
  4. 20
      installer/data/mysql/updatedatabase.pl
  5. 8
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref
  6. 8
      t/Koha.t

111
C4/Koha.pm

@ -28,6 +28,7 @@ use C4::Branch qw(GetBranchesCount);
use Koha::DateUtils qw(dt_from_string);
use Memoize;
use DateTime::Format::MySQL;
use Business::ISBN;
use autouse 'Data::Dumper' => qw(Dumper);
use DBI qw(:sql_types);
@ -71,6 +72,10 @@ BEGIN {
&GetNormalizedOCLCNumber
&xml_escape
&GetVariationsOfISBN
&GetVariationsOfISBNs
&NormalizeISBN
$DEBUG
);
$DEBUG = 0;
@ -1552,15 +1557,107 @@ sub _normalize_match_point {
}
sub _isbn_cleanup {
require Business::ISBN;
my $isbn = Business::ISBN->new( $_[0] );
if ( $isbn ) {
$isbn = $isbn->as_isbn10 if $isbn->type eq 'ISBN13';
if (defined $isbn) {
return $isbn->as_string([]);
my ($isbn) = @_;
return NormalizeISBN(
{
isbn => $isbn,
format => 'ISBN-10',
strip_hyphens => 1,
}
);
}
=head2 NormalizedISBN
my $isbns = NormalizedISBN({
isbn => $isbn,
strip_hyphens => [0,1],
format => ['ISBN-10', 'ISBN-13']
});
Returns an isbn validated by Business::ISBN.
Optionally strips hyphens and/or forces the isbn
to be of the specified format.
If the string cannot be validated as an isbn,
it returns nothing.
=cut
sub NormalizeISBN {
my ($params) = @_;
my $string = $params->{isbn};
my $strip_hyphens = $params->{strip_hyphens};
my $format = $params->{format};
my $isbn = Business::ISBN->new($string);
if ( $isbn && $isbn->error != Business::ISBN::BAD_ISBN ) {
if ( $format eq 'ISBN-10' ) {
$isbn = $isbn->as_isbn10();
}
elsif ( $format eq 'ISBN-13' ) {
$isbn = $isbn->as_isbn13();
}
if ($strip_hyphens) {
$string = $isbn->as_string( [] );
} else {
$string = $isbn->as_string();
}
return $string;
}
return;
}
=head2 GetVariationsOfISBN
my @isbns = GetVariationsOfISBN( $isbn );
Returns a list of varations of the given isbn in
both ISBN-10 and ISBN-13 formats, with and without
hyphens.
In a scalar context, the isbns are returned as a
string delimited by ' | '.
=cut
sub GetVariationsOfISBN {
my ($isbn) = @_;
my @isbns;
push( @isbns, NormalizeISBN({ isbn => $isbn }) );
push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-10' }) );
push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-13' }) );
push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-10', strip_hyphens => 1 }) );
push( @isbns, NormalizeISBN({ isbn => $isbn, format => 'ISBN-13', strip_hyphens => 1 }) );
return wantarray ? @isbns : join( " | ", @isbns );
}
=head2 GetVariationsOfISBNs
my @isbns = GetVariationsOfISBNs( @isbns );
Returns a list of varations of the given isbns in
both ISBN-10 and ISBN-13 formats, with and without
hyphens.
In a scalar context, the isbns are returned as a
string delimited by ' | '.
=cut
sub GetVariationsOfISBNs {
my (@isbns) = @_;
@isbns = map { GetVariationsOfISBN( $_ ) } @isbns;
return wantarray ? @isbns : join( " | ", @isbns );
}
1;

13
C4/Matcher.pm

@ -630,23 +630,32 @@ sub get_matches {
$QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
foreach my $matchpoint ( @{ $self->{'matchpoints'} } ) {
my @source_keys = _get_match_keys( $source_record, $matchpoint );
next if scalar(@source_keys) == 0;
@source_keys = C4::Koha::GetVariationsOfISBNs(@source_keys)
if ( $matchpoint->{index} eq 'isbn'
&& C4::Context->preference('AggressiveMatchOnISBN') );
# build query
my $query;
my $error;
my $searchresults;
my $total_hits;
if ( $self->{'record_type'} eq 'biblio' ) {
my $phr = C4::Context->preference('AggressiveMatchOnISBN') ? ',phr' : q{};
if ($QParser) {
$query = join( " || ",
map { "$matchpoint->{'index'}:$_" } @source_keys );
map { "$matchpoint->{'index'}$phr:$_" } @source_keys );
}
else {
$query = join( " or ",
map { "$matchpoint->{'index'}=$_" } @source_keys );
map { "$matchpoint->{'index'}$phr=$_" } @source_keys );
}
require C4::Search;
( $error, $searchresults, $total_hits ) =
C4::Search::SimpleSearch( $query, 0, $max_matches );
}

1
installer/data/mysql/sysprefs.sql

@ -10,6 +10,7 @@ INSERT INTO systempreferences ( `variable`, `value`, `options`, `explanation`, `
('AdvancedSearchTypes','itemtypes','itemtypes|ccode','Select which set of fields comprise the Type limit in the advanced search','Choice'),
('AgeRestrictionMarker','',NULL,'Markers for age restriction indication, e.g. FSK|PEGI|Age|','free'),
('AgeRestrictionOverride','0',NULL,'Allow staff to check out an item with age restriction.','YesNo'),
('AggressiveMatchOnISBN','0','If enabled, attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records when matching on ISBN with the record import tool','YesNo'),
('AllFinesNeedOverride','1','0','If on, staff will be asked to override every fine, even if it is below noissuescharge.','YesNo'),
('AllowAllMessageDeletion','0','','Allow any Library to delete any message','YesNo'),
('AllowFineOverride','0','0','If on, staff will be able to issue books to patrons with fines greater than noissuescharge.','YesNo'),

20
installer/data/mysql/updatedatabase.pl

@ -8439,6 +8439,26 @@ if ( C4::Context->preference("Version") < TransformToNum($DBversion) ) {
SetVersion($DBversion);
}
$DBversion = "3.15.00.XXX";
if ( CheckVersion($DBversion) ) {
$dbh->do("
INSERT INTO systempreferences (
variable,
value,
explanation,
type
) VALUES (
'AggressiveMatchOnISBN',
'0',
'If enabled, attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records when matching on ISBN with the record import tool',
'YesNo'
)
");
print "Upgrade to $DBversion done (Bug 10500 - Improve isbn matching when importing records)\n";
SetVersion($DBversion);
}
=head1 FUNCTIONS
=head2 TableExists($table)

8
koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/cataloguing.pref

@ -191,3 +191,11 @@ Cataloging:
yes: Display
no: "Don't display"
- acquisition details on the biblio detail page.
Importing:
-
- When matching on ISBN with the record import tool,
- pref: AggressiveMatchOnISBN
choices:
yes: "do"
no: "don't"
- attempt to match aggressively by trying all variations of the ISBNs in the imported record as a phrase in the ISBN fields of already cataloged records.

8
t/Koha.t

@ -3,7 +3,7 @@ use strict;
use warnings;
use C4::Context;
use Test::More tests => 11;
use Test::More tests => 14;
use Test::MockModule;
use DBD::Mock;
@ -61,3 +61,9 @@ is(C4::Koha::_isbn_cleanup('0-590-35340-3'), '0590353403', '_isbn_cleanup remove
is(C4::Koha::_isbn_cleanup('0590353403 (pbk.)'), '0590353403', '_isbn_cleanup removes parenthetical');
is(C4::Koha::_isbn_cleanup('978-0-321-49694-2'), '0321496949', '_isbn_cleanup converts ISBN-13 to ISBN-10');
is(C4::Koha::NormalizeISBN({ isbn => '978-0-321-49694-2 (pbk.)', format => 'ISBN-10', strip_hyphens => 1 }), '0321496949', 'Test NormalizeISBN with all features enabled' );
my @isbns = qw/ 978-0-321-49694-2 0-321-49694-9 978-0-321-49694-2 0321496949 9780321496942/;
is( join('|', @isbns), join('|', GetVariationsOfISBN('978-0-321-49694-2 (pbk.)')), 'GetVariationsOfISBN returns all variations' );
is( join('|', @isbns), join('|', GetVariationsOfISBNs('978-0-321-49694-2 (pbk.)')), 'GetVariationsOfISBNs returns all variations' );

Loading…
Cancel
Save