Browse Source

Bug 20244: Improve Elasticsearch ISBN indexing

https://bugs.koha-community.org/show_bug.cgi?id=20244

Test plan:

1. Add a record with an ISBN-10 or ISBN-13 that can be converted to ISBN-10 (e.g. 1-56619-909-3).
2. Verify that the record can be found by searching for "1-56619-909-3", "1566199093", "978-1-56619-909-4" and "9781566199094".

Signed-off-by: Brendan Gallagher <brendan@bywatersolutions.com>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
18.11.x
Ere Maijala 6 years ago
committed by Nick Clemens
parent
commit
5a0f2821a2
  1. 32
      Koha/SearchEngine/Elasticsearch.pm
  2. 16
      t/Koha/SearchEngine/Elasticsearch.t

32
Koha/SearchEngine/Elasticsearch.pm

@ -38,6 +38,7 @@ use List::Util qw( sum0 reduce );
use MARC::File::XML;
use MIME::Base64;
use Encode qw(encode);
use Business::ISBN;
__PACKAGE__->mk_ro_accessors(qw( index ));
__PACKAGE__->mk_accessors(qw( sort_fields ));
@ -456,6 +457,33 @@ sub marc_records_to_documents {
$record_document->{$field} = sum0(grep { !ref($_) && m/\d+(\.\d+)?/} @{$record_document->{$field}});
}
}
# Index all applicable ISBN forms (ISBN-10 and ISBN-13 with and without dashes)
foreach my $field (@{$rules->{isbn}}) {
if (defined $record_document->{$field}) {
my @isbns = ();
foreach my $input_isbn (@{$record_document->{$field}}) {
my $isbn = Business::ISBN->new($input_isbn);
if (defined $isbn && $isbn->is_valid) {
my $isbn13 = $isbn->as_isbn13->as_string;
push @isbns, $isbn13;
$isbn13 =~ s/\-//g;
push @isbns, $isbn13;
my $isbn10 = $isbn->as_isbn10;
if ($isbn10) {
$isbn10 = $isbn10->as_string;
push @isbns, $isbn10;
$isbn10 =~ s/\-//g;
push @isbns, $isbn10;
}
} else {
push @isbns, $input_isbn;
}
}
$record_document->{$field} = \@isbns;
}
}
# TODO: Perhaps should check if $records_document non empty, but really should never be the case
$record->encoding('UTF-8');
my @warnings;
@ -622,6 +650,7 @@ sub _get_marc_mapping_rules {
'control_fields' => {},
'data_fields' => {},
'sum' => [],
'isbn' => [],
'defaults' => {}
};
@ -632,6 +661,9 @@ sub _get_marc_mapping_rules {
if ($type eq 'sum') {
push @{$rules->{sum}}, $name;
}
elsif ($type eq 'isbn') {
push @{$rules->{isbn}}, $name;
}
elsif ($type eq 'boolean') {
# boolean gets special handling, if value doesn't exist for a field,
# it is set to false

16
t/Koha/SearchEngine/Elasticsearch.t

@ -117,7 +117,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub {
subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub {
plan tests => 45;
plan tests => 32;
t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
@ -131,6 +131,15 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
marc_type => 'marc21',
marc_field => '001',
},
{
name => 'isbn',
type => 'isbn',
facet => 0,
suggestible => 0,
sort => 0,
marc_type => 'marc21',
marc_field => '020a',
},
{
name => 'author',
type => 'string',
@ -246,6 +255,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
$marc_record_1->leader(' cam 22 a 4500');
$marc_record_1->append_fields(
MARC::Field->new('001', '123'),
MARC::Field->new('020', '', '', a => '1-56619-909-3'),
MARC::Field->new('100', '', '', a => 'Author 1'),
MARC::Field->new('110', '', '', a => 'Corp Author'),
MARC::Field->new('210', '', '', a => 'Title 1'),
@ -271,7 +281,6 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
my $docs = $see->marc_records_to_documents($records);
# First record:
is(scalar @{$docs}, 2, 'Two records converted to documents');
is($docs->[0][0], '1234567', 'First document biblionumber should be set as first element in document touple');
@ -360,6 +369,9 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
'First document type_of_record_and_bib_level field should be set correctly'
);
is(scalar @{$docs->[0][1]->{isbn}}, 4, 'First document isbn field should contain four values');
is_deeply($docs->[0][1]->{isbn}, ['978-1-56619-909-4', '9781566199094', '1-56619-909-3', '1566199093'], 'First document isbn field should be set correctly');
# Second record:
is(scalar @{$docs->[1][1]->{author}}, 1, 'Second document author field should contain one value');

Loading…
Cancel
Save