Browse Source

Bug 24807: Refactor using tokenize_callbacks

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
20.11.x
David Gustafsson 4 years ago
committed by Jonathan Druart
parent
commit
342b51629d
  1. 73
      Koha/SearchEngine/Elasticsearch.pm
  2. 4
      t/db_dependent/Koha/SearchEngine/Elasticsearch.t

73
Koha/SearchEngine/Elasticsearch.pm

@ -471,37 +471,56 @@ sub _process_mappings {
# Copy (scalar) data since can have multiple targets
# with differing options for (possibly) mutating data
# so need a different copy for each
my $_data = $data;
my $data_copy = $data;
if (defined $options->{substr}) {
my ($start, $length) = @{$options->{substr}};
$_data = length($data) > $start ? substr $data, $start, $length : '';
$data_copy = length($data) > $start ? substr $data_copy, $start, $length : '';
}
# Add data to tokens array for callbacks processing
my $tokens = [$data_copy];
# Tokenize callbacks takes as token (possibly tokenized subfield data)
# as argument, and returns a possibly different list of tokens.
# Note that this list also might be empty.
if (defined $options->{tokenize_callbacks}) {
foreach my $callback (@{$options->{tokenize_callbacks}}) {
# Pass each token to current callback which returns a list
# (scalar is fine too) resulting either in a list or
# a list of lists that will be flattened by perl.
# The next callback will recieve the possibly expanded list of tokens.
$tokens = [ map { $callback->($_) } @{$tokens} ];
}
}
if (defined $options->{value_callbacks}) {
$_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}});
$tokens = [ map { reduce { $b->($a) } ($_, @{$options->{value_callbacks}}) } @{$tokens} ];
}
if (defined $options->{filter_callbacks}) {
# Skip mapping unless all filter callbacks return true
next unless all { $_data = $_->($_data) } @{$options->{filter_callbacks}};
my @tokens_filtered;
foreach my $_data (@{$tokens}) {
if ( all { $_->($_data) } @{$options->{filter_callbacks}} ) {
push @tokens_filtered, $_data;
}
}
# Overwrite $tokens with filtered values
$tokens = \@tokens_filtered;
}
# Skip mapping if all values has been removed
next unless @{$tokens};
if (defined $options->{property}) {
$_data = {
$options->{property} => $_data
}
$tokens = [ map { { $options->{property} => $_ } } @{$tokens} ];
}
if (defined $options->{nonfiling_characters_indicator}) {
my $nonfiling_chars = $meta->{field}->indicator($options->{nonfiling_characters_indicator});
$nonfiling_chars = looks_like_number($nonfiling_chars) ? int($nonfiling_chars) : 0;
if ($nonfiling_chars) {
$_data = substr $_data, $nonfiling_chars;
}
# Nonfiling chars does not make sense for multiple tokens
# Only apply on first element
$tokens->[0] = substr $tokens->[0], $nonfiling_chars;
}
$record_document->{$target} //= [];
if( ref $_data eq 'ARRAY' ){
push @{$record_document->{$target}}, @{$_data};
} else {
push @{$record_document->{$target}}, $_data;
}
push @{$record_document->{$target}}, @{$tokens};
}
}
@ -897,16 +916,20 @@ sub _field_mappings {
};
}
elsif ($target_type eq 'year') {
$default_options->{filter_callbacks} //= [];
push @{$default_options->{filter_callbacks}}, sub {
$default_options->{tokenize_callbacks} //= [];
# Only accept years containing digits and "u"
push @{$default_options->{tokenize_callbacks}}, sub {
my ($value) = @_;
my @years = ();
my @field_years = ( $value =~ /[0-9u]{4}/g );
foreach my $year (@field_years){
$year =~ s/[u]/0/g;
push @years, $year;
}
return \@years;
my @years = ( $value =~ /[0-9u]{4}/g );
return @years;
};
$default_options->{value_callbacks} //= [];
# Replace "u" with "0" for sorting
push @{$default_options->{value_callbacks}}, sub {
my ($value) = @_;
$value =~ s/[u]/0/g;
return $value;
};
}

4
t/db_dependent/Koha/SearchEngine/Elasticsearch.t

@ -371,6 +371,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
MARC::Field->new('100', '', '', a => 'Author 2'),
# MARC::Field->new('210', '', '', a => 'Title 2'),
# MARC::Field->new('245', '', '', a => 'Title: second record'),
MARC::Field->new('260', '', '', a => 'New York :', b => 'Ace ,', c => '1963-2003'),
MARC::Field->new('999', '', '', c => '1234568'),
MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric', o => $long_callno),
);
@ -531,7 +532,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
# Tests for 'year' type
is_deeply(
$docs->[1]->{'copydate'},
['1963','2003'],
['1963', '2003'],
'Second document copydate field should be set correctly'
);
is_deeply(
@ -673,7 +674,6 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents_array () t
MARC::Field->new('100', '', '', a => 'Author 2'),
# MARC::Field->new('210', '', '', a => 'Title 2'),
# MARC::Field->new('245', '', '', a => 'Title: second record'),
MARC::Field->new('260', '', '', a => 'New York :', b => 'Ace ,', c => '1963-2003'),
MARC::Field->new('999', '', '', c => '1234568'),
MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric'),
);

Loading…
Cancel
Save