From 5a0f2821a2ff104781c1ff05a80957720ae46c43 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Mon, 2 Jul 2018 15:55:10 +0300 Subject: [PATCH] Bug 20244: Improve Elasticsearch ISBN indexing https://bugs.koha-community.org/show_bug.cgi?id=20244 Test plan: 1. Add a record with an ISBN-10 or ISBN-13 that can be converted to ISBN-10 (e.g. 1-56619-909-3). 2. Verify that the record can be found by searching for "1-56619-909-3", "1566199093", "978-1-56619-909-4" and "9781566199094". Signed-off-by: Brendan Gallagher Signed-off-by: Martin Renvoize Signed-off-by: Nick Clemens --- Koha/SearchEngine/Elasticsearch.pm | 32 +++++++++++++++++++++++++++++ t/Koha/SearchEngine/Elasticsearch.t | 16 +++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 0e17b93804..64ea156625 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -38,6 +38,7 @@ use List::Util qw( sum0 reduce ); use MARC::File::XML; use MIME::Base64; use Encode qw(encode); +use Business::ISBN; __PACKAGE__->mk_ro_accessors(qw( index )); __PACKAGE__->mk_accessors(qw( sort_fields )); @@ -456,6 +457,33 @@ sub marc_records_to_documents { $record_document->{$field} = sum0(grep { !ref($_) && m/\d+(\.\d+)?/} @{$record_document->{$field}}); } } + # Index all applicable ISBN forms (ISBN-10 and ISBN-13 with and without dashes) + foreach my $field (@{$rules->{isbn}}) { + if (defined $record_document->{$field}) { + my @isbns = (); + foreach my $input_isbn (@{$record_document->{$field}}) { + my $isbn = Business::ISBN->new($input_isbn); + if (defined $isbn && $isbn->is_valid) { + my $isbn13 = $isbn->as_isbn13->as_string; + push @isbns, $isbn13; + $isbn13 =~ s/\-//g; + push @isbns, $isbn13; + + my $isbn10 = $isbn->as_isbn10; + if ($isbn10) { + $isbn10 = $isbn10->as_string; + push @isbns, $isbn10; + $isbn10 =~ s/\-//g; + push @isbns, $isbn10; + } + } else { + push @isbns, $input_isbn; + } + } + $record_document->{$field} = \@isbns; + } + } + # TODO: Perhaps should check if $records_document non empty, but really should never be the case $record->encoding('UTF-8'); my @warnings; @@ -622,6 +650,7 @@ sub _get_marc_mapping_rules { 'control_fields' => {}, 'data_fields' => {}, 'sum' => [], + 'isbn' => [], 'defaults' => {} }; @@ -632,6 +661,9 @@ sub _get_marc_mapping_rules { if ($type eq 'sum') { push @{$rules->{sum}}, $name; } + elsif ($type eq 'isbn') { + push @{$rules->{isbn}}, $name; + } elsif ($type eq 'boolean') { # boolean gets special handling, if value doesn't exist for a field, # it is set to false diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/Koha/SearchEngine/Elasticsearch.t index 201614b9af..933fdfb644 100644 --- a/t/Koha/SearchEngine/Elasticsearch.t +++ b/t/Koha/SearchEngine/Elasticsearch.t @@ -117,7 +117,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub { subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub { - plan tests => 45; + plan tests => 32; t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); @@ -131,6 +131,15 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' marc_type => 'marc21', marc_field => '001', }, + { + name => 'isbn', + type => 'isbn', + facet => 0, + suggestible => 0, + sort => 0, + marc_type => 'marc21', + marc_field => '020a', + }, { name => 'author', type => 'string', @@ -246,6 +255,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' $marc_record_1->leader(' cam 22 a 4500'); $marc_record_1->append_fields( MARC::Field->new('001', '123'), + MARC::Field->new('020', '', '', a => '1-56619-909-3'), MARC::Field->new('100', '', '', a => 'Author 1'), MARC::Field->new('110', '', '', a => 'Corp Author'), MARC::Field->new('210', '', '', a => 'Title 1'), @@ -271,7 +281,6 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' my $docs = $see->marc_records_to_documents($records); # First record: - is(scalar @{$docs}, 2, 'Two records converted to documents'); is($docs->[0][0], '1234567', 'First document biblionumber should be set as first element in document touple'); @@ -360,6 +369,9 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' 'First document type_of_record_and_bib_level field should be set correctly' ); + is(scalar @{$docs->[0][1]->{isbn}}, 4, 'First document isbn field should contain four values'); + is_deeply($docs->[0][1]->{isbn}, ['978-1-56619-909-4', '9781566199094', '1-56619-909-3', '1566199093'], 'First document isbn field should be set correctly'); + # Second record: is(scalar @{$docs->[1][1]->{author}}, 1, 'Second document author field should contain one value'); -- 2.39.5