From aece4f62ea6c37b0dafa9584038571d0a2d49e43 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Mon, 2 Jul 2018 16:04:05 +0300 Subject: [PATCH] Bug 20244: Add alt script indexing and fix sort field indexing https://bugs.koha-community.org/show_bug.cgi?id=20244 Test plan: 1. Add a record with alternate script fields in 880 (sample attached in the bug). 2. Make sure the the record can be found e.g. with the alternate script title. 3. Add a record with multiple authors. 4. Check that in the index there is only a single author__sort field. Signed-off-by: Brendan Gallagher Signed-off-by: Martin Renvoize Signed-off-by: Nick Clemens --- Koha/SearchEngine/Elasticsearch.pm | 48 +++++++++++++++++++++++------ t/Koha/SearchEngine/Elasticsearch.t | 10 +++--- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 64ea156625..77580a715f 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -310,9 +310,9 @@ sub sort_fields { return $self->_sort_fields_accessor(); } -=head2 _process_mappings($mappings, $data, $record_document) +=head2 _process_mappings($mappings, $data, $record_document, $altscript) - $self->_process_mappings($mappings, $marc_field_data, $record_document) + $self->_process_mappings($mappings, $marc_field_data, $record_document, 0) Process all C<$mappings> targets operating on a specific MARC field C<$data>. Since we group all mappings by MARC field targets C<$mappings> will contain @@ -338,14 +338,26 @@ The source data from a MARC record field. Hashref representing the Elasticsearch document on which mappings should be applied. +=item C<$altscript> + +A boolean value indicating whether an alternate script presentation is being +processed. + =back =cut sub _process_mappings { - my ($_self, $mappings, $data, $record_document) = @_; + my ($_self, $mappings, $data, $record_document, $altscript) = @_; foreach my $mapping (@{$mappings}) { my ($target, $options) = @{$mapping}; + + # Don't process sort fields for alternate scripts + my $sort = $target =~ /__sort$/; + if ($sort && $altscript) { + next; + } + # Copy (scalar) data since can have multiple targets # with differing options for (possibly) mutating data # so need a different copy for each @@ -363,7 +375,12 @@ sub _process_mappings { $options->{property} => $_data } } - push @{$record_document->{$target}}, $_data; + # For sort fields, index only a single field with concatenated values + if ($sort && @{$record_document->{$target}}) { + @{$record_document->{$target}}[0] .= " $_data"; + } else { + push @{$record_document->{$target}}, $_data; + } } } @@ -399,17 +416,28 @@ sub marc_records_to_documents { my $record_document = {}; my $mappings = $rules->{leader}; if ($mappings) { - $self->_process_mappings($mappings, $record->leader(), $record_document); + $self->_process_mappings($mappings, $record->leader(), $record_document, 0); } foreach my $field ($record->fields()) { - if($field->is_control_field()) { + if ($field->is_control_field()) { my $mappings = $control_fields_rules->{$field->tag()}; if ($mappings) { - $self->_process_mappings($mappings, $field->data(), $record_document); + $self->_process_mappings($mappings, $field->data(), $record_document, 0); } } else { - my $data_field_rules = $data_fields_rules->{$field->tag()}; + my $tag = $field->tag(); + # Handle alternate scripts in MARC 21 + my $altscript = 0; + if ($marcflavour eq 'marc21' && $tag eq '880') { + my $sub6 = $field->subfield('6'); + if ($sub6 =~ /^(...)-\d+/) { + $tag = $1; + $altscript = 1; + } + } + + my $data_field_rules = $data_fields_rules->{$tag}; if ($data_field_rules) { my $subfields_mappings = $data_field_rules->{subfields}; @@ -421,7 +449,7 @@ sub marc_records_to_documents { $mappings = [@{$mappings}, @{$wildcard_mappings}]; } if (@{$mappings}) { - $self->_process_mappings($mappings, $data, $record_document); + $self->_process_mappings($mappings, $data, $record_document, $altscript); } } @@ -437,7 +465,7 @@ sub marc_records_to_documents { ) ); if ($data) { - $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document); + $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, $altscript); } } } diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/Koha/SearchEngine/Elasticsearch.t index 933fdfb644..a4fc6b561b 100644 --- a/t/Koha/SearchEngine/Elasticsearch.t +++ b/t/Koha/SearchEngine/Elasticsearch.t @@ -117,7 +117,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub { subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub { - plan tests => 32; + plan tests => 47; t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); @@ -290,11 +290,11 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' is(scalar @{$docs->[0][1]->{author}}, 2, 'First document author field should contain two values'); is_deeply($docs->[0][1]->{author}, ['Author 1', 'Corp Author'], 'First document author field should be set correctly'); - is(scalar @{$docs->[0][1]->{author__sort}}, 2, 'First document author__sort field should have two values'); - is_deeply($docs->[0][1]->{author__sort}, ['Author 1', 'Corp Author'], 'First document author__sort field should be set correctly'); + is(scalar @{$docs->[0][1]->{author__sort}}, 1, 'First document author__sort field should have a single value'); + is_deeply($docs->[0][1]->{author__sort}, ['Author 1 Corp Author'], 'First document author__sort field should be set correctly'); - is(scalar @{$docs->[0][1]->{title__sort}}, 3, 'First document title__sort field should have three values'); - is_deeply($docs->[0][1]->{title__sort}, ['Title:', 'first record', 'Title: first record'], 'First document title__sort field should be set correctly'); + is(scalar @{$docs->[0][1]->{title__sort}}, 1, 'First document title__sort field should have a single'); + is_deeply($docs->[0][1]->{title__sort}, ['Title: first record Title: first record'], 'First document title__sort field should be set correctly'); is(scalar @{$docs->[0][1]->{title_wildcard}}, 2, 'First document title_wildcard field should have two values'); is_deeply($docs->[0][1]->{title_wildcard}, ['Title:', 'first record'], 'First document title_wildcard field should be set correctly'); -- 2.39.5