Bug 22771: Respect nonfiling indicators for search fields

Strip initial characters from search fields in accordance with
nonfiling character indicators.

To test:

1) Apply patch
2) Run tests in t/Koha/SearchEngine/Elasticsearch.t
3) All tests should pass

Signed-off-by: David Nind <david@davidnind.com>
Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
This commit is contained in:
David Gustafsson 2020-02-26 16:14:36 +01:00 committed by Martin Renvoize
parent bc25e2c100
commit da6d7f5e33
Signed by: martin.renvoize
GPG key ID: 422B469130441A0F
2 changed files with 116 additions and 11 deletions

View file

@ -42,6 +42,7 @@ use MARC::File::XML;
use MIME::Base64;
use Encode qw(encode);
use Business::ISBN;
use Scalar::Util qw(looks_like_number);
__PACKAGE__->mk_ro_accessors(qw( index ));
__PACKAGE__->mk_accessors(qw( sort_fields ));
@ -346,7 +347,7 @@ sub sort_fields {
return $self->_sort_fields_accessor();
}
=head2 _process_mappings($mappings, $data, $record_document, $altscript)
=head2 _process_mappings($mappings, $data, $record_document, $meta)
$self->_process_mappings($mappings, $marc_field_data, $record_document, 0)
@ -374,23 +375,42 @@ The source data from a MARC record field.
Hashref representing the Elasticsearch document on which mappings should be
applied.
=item C<$altscript>
=item C<$meta>
A boolean value indicating whether an alternate script presentation is being
A hashref containing metadata useful for enforcing per mapping rules. For
example for providing extra context for mapping options, or treating mapping
targets differently depending on type (sort, search, facet etc). Combining
this metadata with the mapping options and metadata allows us to mutate the
data per mapping, or even replace it with other data retrieved from the
metadata context.
Current properties are:
C<altscript>: A boolean value indicating whether an alternate script presentation is being
processed.
C<data_source>: The source of the $<data> argument. Possible values are: 'leader', 'control_field',
'subfield' or 'subfields_group'.
C<code>: The code of the subfield C<$data> was retrieved, if C<data_source> is 'subfield'.
C<codes>: Subfield codes of the subfields group from which C<$data> was retrieved, if C<data_source>
is 'subfields_group'.
C<field>: The original C<MARC::Record> object.
=back
=cut
sub _process_mappings {
my ($_self, $mappings, $data, $record_document, $altscript) = @_;
my ($_self, $mappings, $data, $record_document, $meta) = @_;
foreach my $mapping (@{$mappings}) {
my ($target, $options) = @{$mapping};
# Don't process sort fields for alternate scripts
my $sort = $target =~ /__sort$/;
if ($sort && $altscript) {
if ($sort && $meta->{altscript}) {
next;
}
@ -411,6 +431,13 @@ sub _process_mappings {
$options->{property} => $_data
}
}
if (defined $options->{nonfiling_characters_indicator}) {
my $nonfiling_chars = $meta->{field}->indicator($options->{nonfiling_characters_indicator});
$nonfiling_chars = looks_like_number($nonfiling_chars) ? int($nonfiling_chars) : 0;
if ($nonfiling_chars) {
$_data = substr $_data, $nonfiling_chars;
}
}
push @{$record_document->{$target}}, $_data;
}
}
@ -448,13 +475,22 @@ sub marc_records_to_documents {
my $record_document = {};
my $mappings = $rules->{leader};
if ($mappings) {
$self->_process_mappings($mappings, $record->leader(), $record_document, 0);
$self->_process_mappings($mappings, $record->leader(), $record_document, {
altscript => 0,
data_source => 'leader'
}
);
}
foreach my $field ($record->fields()) {
if ($field->is_control_field()) {
my $mappings = $control_fields_rules->{$field->tag()};
if ($mappings) {
$self->_process_mappings($mappings, $field->data(), $record_document, 0);
$self->_process_mappings($mappings, $field->data(), $record_document, {
altscript => 0,
data_source => 'control_field',
field => $field
}
);
}
}
else {
@ -480,7 +516,13 @@ sub marc_records_to_documents {
$mappings = [@{$mappings}, @{$wildcard_mappings}];
}
if (@{$mappings}) {
$self->_process_mappings($mappings, $data, $record_document, $altscript);
$self->_process_mappings($mappings, $data, $record_document, {
altscript => $altscript,
data_source => 'subfield',
code => $code,
field => $field
}
);
}
if ( defined @{$mappings}[0] && grep /match-heading/, @{@{$mappings}[0]} ){
# Used by the authority linker the match-heading field requires a specific syntax
@ -503,7 +545,13 @@ sub marc_records_to_documents {
)
);
if ($data) {
$self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, $altscript);
$self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, {
altscript => $altscript,
data_source => 'subfields_group',
codes => $subfields_group,
field => $field
}
);
}
if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){
# Used by the authority linker the match-heading field requires a specific syntax
@ -799,7 +847,9 @@ sub _field_mappings {
push @{$mapping}, {%{$default_options}, property => 'input'};
}
else {
push @{$mapping}, $default_options;
# Important! Make shallow clone, or we end up with the same hashref
# shared by all mappings
push @{$mapping}, {%{$default_options}};
}
push @mappings, $mapping;
}
@ -934,6 +984,38 @@ sub _get_marc_mapping_rules {
);
}
});
# Marc-flavour specific rule tweaks, could/should also provide hook for this
if ($marcflavour eq 'marc21') {
# Nonfiling characters processing for sort fields
my %title_fields;
if ($self->index eq $Koha::SearchEngine::BIBLIOS_INDEX) {
# Format is: nonfiling characters indicator => field names list
%title_fields = (
1 => [630, 730, 740],
2 => [130, 222, 240, 242, 243, 440, 830]
);
}
elsif ($self->index eq $Koha::SearchEngine::AUTHORITIES_INDEX) {
%title_fields = (
1 => [730],
2 => [130, 430, 530]
);
}
foreach my $indicator (keys %title_fields) {
foreach my $field_tag (@{$title_fields{$indicator}}) {
my $mappings = $rules->{data_fields}->{$field_tag}->{subfields}->{a} // [];
foreach my $mapping (@{$mappings}) {
if ($mapping->[0] =~ /__sort$/) {
# Mark this as to be processed for nonfiling characters indicator
# later on in _process_mappings
$mapping->[1]->{nonfiling_characters_indicator} = $indicator;
}
}
}
}
}
return $rules;
}

View file

@ -118,7 +118,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub {
subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub {
plan tests => 51;
plan tests => 53;
t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709');
@ -194,6 +194,16 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
marc_type => 'marc21',
marc_field => '220',
},
{
name => 'uniform_title',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 1,
marc_type => 'marc21',
marc_field => '240a',
},
{
name => 'title_wildcard',
type => 'string',
@ -309,6 +319,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
MARC::Field->new('100', '', '', a => 'Author 1'),
MARC::Field->new('110', '', '', a => 'Corp Author'),
MARC::Field->new('210', '', '', a => 'Title 1'),
MARC::Field->new('240', '', '4', a => 'The uniform title with nonfiling indicator'),
MARC::Field->new('245', '', '', a => 'Title:', b => 'first record'),
MARC::Field->new('999', '', '', c => '1234567'),
# ' ' for testing trimming of white space in boolean value callback:
@ -433,6 +444,18 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
'First document local_classification field should be set correctly'
);
# Nonfiling characters for sort fields
is_deeply(
$docs->[0]->{uniform_title},
['The uniform title with nonfiling indicator'],
'First document uniform_title field should contain the title verbatim'
);
is_deeply(
$docs->[0]->{uniform_title__sort},
['uniform title with nonfiling indicator'],
'First document uniform_title__sort field should contain the title with the first four initial characters removed'
);
# Second record:
is(scalar @{$docs->[1]->{author}}, 1, 'Second document author field should contain one value');