From da6d7f5e3345bc64bc40dd5e6cf9df8351fdb5e4 Mon Sep 17 00:00:00 2001 From: David Gustafsson Date: Wed, 26 Feb 2020 16:14:36 +0100 Subject: [PATCH] Bug 22771: Respect nonfiling indicators for search fields Strip initial characters from search fields in accordance with nonfiling character indicators. To test: 1) Apply patch 2) Run tests in t/Koha/SearchEngine/Elasticsearch.t 3) All tests should pass Signed-off-by: David Nind Signed-off-by: Nick Clemens Signed-off-by: Martin Renvoize --- Koha/SearchEngine/Elasticsearch.pm | 102 +++++++++++++++++++++++++--- t/Koha/SearchEngine/Elasticsearch.t | 25 ++++++- 2 files changed, 116 insertions(+), 11 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 85574cefc0..cb34c2aacd 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -42,6 +42,7 @@ use MARC::File::XML; use MIME::Base64; use Encode qw(encode); use Business::ISBN; +use Scalar::Util qw(looks_like_number); __PACKAGE__->mk_ro_accessors(qw( index )); __PACKAGE__->mk_accessors(qw( sort_fields )); @@ -346,7 +347,7 @@ sub sort_fields { return $self->_sort_fields_accessor(); } -=head2 _process_mappings($mappings, $data, $record_document, $altscript) +=head2 _process_mappings($mappings, $data, $record_document, $meta) $self->_process_mappings($mappings, $marc_field_data, $record_document, 0) @@ -374,23 +375,42 @@ The source data from a MARC record field. Hashref representing the Elasticsearch document on which mappings should be applied. -=item C<$altscript> +=item C<$meta> -A boolean value indicating whether an alternate script presentation is being +A hashref containing metadata useful for enforcing per mapping rules. For +example for providing extra context for mapping options, or treating mapping +targets differently depending on type (sort, search, facet etc). Combining +this metadata with the mapping options and metadata allows us to mutate the +data per mapping, or even replace it with other data retrieved from the +metadata context. + +Current properties are: + +C: A boolean value indicating whether an alternate script presentation is being processed. +C: The source of the $ argument. Possible values are: 'leader', 'control_field', +'subfield' or 'subfields_group'. + +C: The code of the subfield C<$data> was retrieved, if C is 'subfield'. + +C: Subfield codes of the subfields group from which C<$data> was retrieved, if C +is 'subfields_group'. + +C: The original C object. + =back =cut sub _process_mappings { - my ($_self, $mappings, $data, $record_document, $altscript) = @_; + my ($_self, $mappings, $data, $record_document, $meta) = @_; foreach my $mapping (@{$mappings}) { my ($target, $options) = @{$mapping}; # Don't process sort fields for alternate scripts my $sort = $target =~ /__sort$/; - if ($sort && $altscript) { + if ($sort && $meta->{altscript}) { next; } @@ -411,6 +431,13 @@ sub _process_mappings { $options->{property} => $_data } } + if (defined $options->{nonfiling_characters_indicator}) { + my $nonfiling_chars = $meta->{field}->indicator($options->{nonfiling_characters_indicator}); + $nonfiling_chars = looks_like_number($nonfiling_chars) ? int($nonfiling_chars) : 0; + if ($nonfiling_chars) { + $_data = substr $_data, $nonfiling_chars; + } + } push @{$record_document->{$target}}, $_data; } } @@ -448,13 +475,22 @@ sub marc_records_to_documents { my $record_document = {}; my $mappings = $rules->{leader}; if ($mappings) { - $self->_process_mappings($mappings, $record->leader(), $record_document, 0); + $self->_process_mappings($mappings, $record->leader(), $record_document, { + altscript => 0, + data_source => 'leader' + } + ); } foreach my $field ($record->fields()) { if ($field->is_control_field()) { my $mappings = $control_fields_rules->{$field->tag()}; if ($mappings) { - $self->_process_mappings($mappings, $field->data(), $record_document, 0); + $self->_process_mappings($mappings, $field->data(), $record_document, { + altscript => 0, + data_source => 'control_field', + field => $field + } + ); } } else { @@ -480,7 +516,13 @@ sub marc_records_to_documents { $mappings = [@{$mappings}, @{$wildcard_mappings}]; } if (@{$mappings}) { - $self->_process_mappings($mappings, $data, $record_document, $altscript); + $self->_process_mappings($mappings, $data, $record_document, { + altscript => $altscript, + data_source => 'subfield', + code => $code, + field => $field + } + ); } if ( defined @{$mappings}[0] && grep /match-heading/, @{@{$mappings}[0]} ){ # Used by the authority linker the match-heading field requires a specific syntax @@ -503,7 +545,13 @@ sub marc_records_to_documents { ) ); if ($data) { - $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, $altscript); + $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, { + altscript => $altscript, + data_source => 'subfields_group', + codes => $subfields_group, + field => $field + } + ); } if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){ # Used by the authority linker the match-heading field requires a specific syntax @@ -799,7 +847,9 @@ sub _field_mappings { push @{$mapping}, {%{$default_options}, property => 'input'}; } else { - push @{$mapping}, $default_options; + # Important! Make shallow clone, or we end up with the same hashref + # shared by all mappings + push @{$mapping}, {%{$default_options}}; } push @mappings, $mapping; } @@ -934,6 +984,38 @@ sub _get_marc_mapping_rules { ); } }); + + # Marc-flavour specific rule tweaks, could/should also provide hook for this + if ($marcflavour eq 'marc21') { + # Nonfiling characters processing for sort fields + my %title_fields; + if ($self->index eq $Koha::SearchEngine::BIBLIOS_INDEX) { + # Format is: nonfiling characters indicator => field names list + %title_fields = ( + 1 => [630, 730, 740], + 2 => [130, 222, 240, 242, 243, 440, 830] + ); + } + elsif ($self->index eq $Koha::SearchEngine::AUTHORITIES_INDEX) { + %title_fields = ( + 1 => [730], + 2 => [130, 430, 530] + ); + } + foreach my $indicator (keys %title_fields) { + foreach my $field_tag (@{$title_fields{$indicator}}) { + my $mappings = $rules->{data_fields}->{$field_tag}->{subfields}->{a} // []; + foreach my $mapping (@{$mappings}) { + if ($mapping->[0] =~ /__sort$/) { + # Mark this as to be processed for nonfiling characters indicator + # later on in _process_mappings + $mapping->[1]->{nonfiling_characters_indicator} = $indicator; + } + } + } + } + } + return $rules; } diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/Koha/SearchEngine/Elasticsearch.t index 0d76dd17d9..1bbc48be85 100644 --- a/t/Koha/SearchEngine/Elasticsearch.t +++ b/t/Koha/SearchEngine/Elasticsearch.t @@ -118,7 +118,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub { subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub { - plan tests => 51; + plan tests => 53; t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709'); @@ -194,6 +194,16 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' marc_type => 'marc21', marc_field => '220', }, + { + name => 'uniform_title', + type => 'string', + facet => 0, + suggestible => 0, + searchable => 1, + sort => 1, + marc_type => 'marc21', + marc_field => '240a', + }, { name => 'title_wildcard', type => 'string', @@ -309,6 +319,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' MARC::Field->new('100', '', '', a => 'Author 1'), MARC::Field->new('110', '', '', a => 'Corp Author'), MARC::Field->new('210', '', '', a => 'Title 1'), + MARC::Field->new('240', '', '4', a => 'The uniform title with nonfiling indicator'), MARC::Field->new('245', '', '', a => 'Title:', b => 'first record'), MARC::Field->new('999', '', '', c => '1234567'), # ' ' for testing trimming of white space in boolean value callback: @@ -433,6 +444,18 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' 'First document local_classification field should be set correctly' ); + # Nonfiling characters for sort fields + is_deeply( + $docs->[0]->{uniform_title}, + ['The uniform title with nonfiling indicator'], + 'First document uniform_title field should contain the title verbatim' + ); + is_deeply( + $docs->[0]->{uniform_title__sort}, + ['uniform title with nonfiling indicator'], + 'First document uniform_title__sort field should contain the title with the first four initial characters removed' + ); + # Second record: is(scalar @{$docs->[1]->{author}}, 1, 'Second document author field should contain one value'); -- 2.39.5