Parcourir la source

Bug 24807: Add "year" type to improve sorting behaviour

Add a "year" search field type. Fields with this type will only
retain values that looks like years, so invalid values such as
whitespace or word characters will not be indexed.
This for instance improves the behaviour when sorting by
"date-of-publication". If all values are indexed, records with
junk data instead of valid years will appear first among the search
results, drowning out more relevant hits. If assigning this field
the "year" type these records will instead always appear last,
regarless of sort order.

To test:

1) Have at least two biblios, one with a valid year in 008 (pos 7-10)
and another with an invalid one ("uuuu" for example)
2) Perform a wildcard search (*) and sort results by publication date.
3) The record with invalid year of pulication in 008 should appear first
4) Apply patch and run database updates
5) Reindex ElasticSearch
6) Perform the same search as in 2)
7) The record with the invalid year should now appear last

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
20.11.x
David Gustafsson il y a 1 an
committed by Jonathan Druart
Parent
révision
d1c8d7ecce
  1. 18
      Koha/SearchEngine/Elasticsearch.pm
  2. 3
      Koha/SearchEngine/Elasticsearch/Indexer.pm
  3. 2
      admin/searchengine/elasticsearch/field_config.yaml
  4. 5
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt
  5. 22
      t/db_dependent/Koha/SearchEngine/Elasticsearch.t

18
Koha/SearchEngine/Elasticsearch.pm

@ -39,7 +39,7 @@ use Search::Elasticsearch;
use Try::Tiny;
use YAML::Syck;
use List::Util qw( sum0 reduce );
use List::Util qw( sum0 reduce all );
use MARC::File::XML;
use MIME::Base64;
use Encode qw(encode);
@ -209,6 +209,8 @@ sub get_elasticsearch_mappings {
$es_type = 'integer';
} elsif ($type eq 'isbn' || $type eq 'stdno') {
$es_type = 'stdno';
} elsif ($type eq 'year') {
$es_type = 'year';
}
if ($search) {
@ -470,7 +472,6 @@ sub _process_mappings {
# with differing options for (possibly) mutating data
# so need a different copy for each
my $_data = $data;
$record_document->{$target} //= [];
if (defined $options->{substr}) {
my ($start, $length) = @{$options->{substr}};
$_data = length($data) > $start ? substr $data, $start, $length : '';
@ -478,6 +479,10 @@ sub _process_mappings {
if (defined $options->{value_callbacks}) {
$_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}});
}
if (defined $options->{filter_callbacks}) {
# Skip mapping unless all filter callbacks return true
next unless all { $_->($_data) } @{$options->{filter_callbacks}};
}
if (defined $options->{property}) {
$_data = {
$options->{property} => $_data
@ -490,6 +495,8 @@ sub _process_mappings {
$_data = substr $_data, $nonfiling_chars;
}
}
$record_document->{$target} //= [];
push @{$record_document->{$target}}, $_data;
}
}
@ -885,6 +892,13 @@ sub _field_mappings {
return $value ? 'true' : 'false';
};
}
elsif ($target_type eq 'year') {
$default_options->{filter_callbacks} //= [];
push @{$default_options->{filter_callbacks}}, sub {
my ($value) = @_;
return $value =~ /^\d+$/;
};
}
if ($search) {
my $mapping = [$target_name, $default_options];

3
Koha/SearchEngine/Elasticsearch/Indexer.pm

@ -117,6 +117,9 @@ sub update_index {
type => 'data', # is just hard coded in Indexer.pm?
body => \@body
);
if ($response->{errors}) {
carp "One or more ElasticSearch errors occured when indexing documents";
}
}
return $response;
}

2
admin/searchengine/elasticsearch/field_config.yaml

@ -25,6 +25,8 @@ search:
type: integer
null_value: 0
ignore_malformed: true
year:
type: short
stdno:
type: text
analyzer: analyzer_stdno

5
koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt

@ -196,6 +196,11 @@ a.add, a.delete {
[% ELSE %]
<option value="date">Date</option>
[% END %]
[% IF search_field.type == "year" %]
<option value="year" selected="selected">Year</option>
[% ELSE %]
<option value="year">Year</option>
[% END %]
[% IF search_field.type == "number" %]
<option value="number" selected="selected">Number</option>
[% ELSE %]

22
t/db_dependent/Koha/SearchEngine/Elasticsearch.t

@ -132,7 +132,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub {
subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub {
plan tests => 53;
plan tests => 56;
t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709');
@ -298,6 +298,17 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
marc_type => 'marc21',
marc_field => '952l',
},
{
name => 'date-of-publication',
type => 'year',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 1,
marc_type => 'marc21',
marc_field => '008_/7-10',
},
);
my $se = Test::MockModule->new('Koha::SearchEngine::Elasticsearch');
@ -329,6 +340,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
$marc_record_1->append_fields(
MARC::Field->new('001', '123'),
MARC::Field->new('007', 'ku'),
MARC::Field->new('008', '901111s1962 xxk|||| |00| ||eng c'),
MARC::Field->new('020', '', '', a => '1-56619-909-3'),
MARC::Field->new('100', '', '', a => 'Author 1'),
MARC::Field->new('110', '', '', a => 'Corp Author'),
@ -344,6 +356,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
my $marc_record_2 = MARC::Record->new();
$marc_record_2->leader(' cam 22 a 4500');
$marc_record_2->append_fields(
MARC::Field->new('008', '901111s19uu xxk|||| |00| ||eng c'),
MARC::Field->new('100', '', '', a => 'Author 2'),
# MARC::Field->new('210', '', '', a => 'Title 2'),
# MARC::Field->new('245', '', '', a => 'Title: second record'),
@ -470,6 +483,10 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
'First document uniform_title__sort field should contain the title with the first four initial characters removed'
);
# Tests for 'year' type and 'filter_callbacks'
is(scalar @{$docs->[0]->{'date-of-publication'}}, 1, 'First document date-of-publication field should contain one value');
is_deeply($docs->[0]->{'date-of-publication'}, ['1962'], 'First document date-of-publication field should be set correctly');
# Second record:
is(scalar @{$docs->[1]->{author}}, 1, 'Second document author field should contain one value');
@ -494,6 +511,9 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
'Second document local_classification__sort field should be set correctly'
);
# Tests for 'year' type and 'filter_callbacks'
ok(!(defined $docs->[1]->{'date-of-publication'}), "Second document invalid date-of-publication value should have been removed");
# Mappings marc_type:
ok(!(defined $docs->[0]->{unimarc_title}), "No mapping when marc_type doesn't match marc flavour");

Chargement…
Annuler
Enregistrer