Browse Source

Bug 20073: Move Elasticsearch configs to yaml files and improve the default settings.

Improvements:
1) Index settings moved from code to etc/searchengine/elasticsearch/index_config.yaml. An alternative can be specified in koha-conf.xml.
2) Field settings moved from code to etc/searchengine/elasticsearch/field_config.yaml. An alternative can be specified in koha-conf.xml.
3) mappings.yaml has been moved from admin/searchengine/elasticsearch to etc/searchengine/elasticsearch. An alternative can be specified in koha-conf.xml.
4) Default settings have been improved to remove punctuation from phrases used for sorting etc.
5) State variables are used for storing configuration to avoid parsing it multiple times.
6) A possibility to reset the fields too has been added to the reset operation of mappings administration.
7) mappings.yaml has been moved from admin/searchengine/elasticsearch to etc/searchengine/elasticsearch.
8) An stdno field type has been added for standard identifiers.

To test:
1) Run tests in t/Koha/SearchEngine/Elasticsearch.t
2) Clear tables search_fields and search_marc_map
3) Go to admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1
4) Verify that admin/searchengine/elasticsearch/mappings.pl displays the mappings properly, including ISBN and other standard number fields.
5) Index some records using the -d parameter with misc/search_tools/rebuild_elastic_search.pl to recreate the index
6) Verify that you can find the records
7) Put <elasticsearch_index_mappings>non_existent</elasticsearch_index_mappings> to koha-conf.xml
8) Verify that admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1 fails because it can't find non_existent.
9) Copy etc/searchengine/elasticsearch/mappings.yaml to a new location and make elasticsearch_index_mappings setting in koha-conf.xml point to it.
10) Make a change in the new mappings.yaml.
11) Clear table search_fields (mappings reset doesn't do it yet, see bug 20248)
12) Go to admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1
13) Verify that the changes you made are now visible in the mappings UI

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>

Bug 20073: Move Elasticsearch yaml files back to admin directory

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
18.11.x
Ere Maijala 6 years ago
committed by Nick Clemens
parent
commit
133df5e912
  1. 201
      Koha/SearchEngine/Elasticsearch.pm
  2. 61
      admin/searchengine/elasticsearch/field_config.yaml
  3. 34
      admin/searchengine/elasticsearch/index_config.yaml
  4. 7
      admin/searchengine/elasticsearch/mappings.yaml
  5. 11
      debian/templates/koha-conf-site.xml.in
  6. 11
      etc/koha-conf.xml
  7. 8
      installer/data/mysql/atomicupdate/bug_20073.perl
  8. 2
      installer/data/mysql/kohastructure.sql
  9. 10
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt
  10. 26
      t/Koha/SearchEngine/Elasticsearch.t

201
Koha/SearchEngine/Elasticsearch.pm

@ -34,8 +34,6 @@ use Search::Elasticsearch;
use Try::Tiny;
use YAML::Syck;
use Data::Dumper; # TODO remove
__PACKAGE__->mk_ro_accessors(qw( index ));
__PACKAGE__->mk_accessors(qw( sort_fields ));
@ -137,24 +135,14 @@ A hashref containing the settings is returned.
sub get_elasticsearch_settings {
my ($self) = @_;
# Ultimately this should come from a file or something, and not be
# hardcoded.
my $settings = {
index => {
analysis => {
analyzer => {
analyser_phrase => {
tokenizer => 'icu_tokenizer',
filter => ['icu_folding'],
},
analyser_standard => {
tokenizer => 'icu_tokenizer',
filter => ['icu_folding'],
},
},
}
}
};
# Use state to speed up repeated calls
state $settings = undef;
if (!defined $settings) {
my $config_file = C4::Context->config('elasticsearch_index_config');
$config_file ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/index_config.yaml';
$settings = LoadFile( $config_file );
}
return $settings;
}
@ -170,116 +158,97 @@ created.
sub get_elasticsearch_mappings {
my ($self) = @_;
# TODO cache in the object?
my $mappings = {
data => {
_all => {type => "string", analyzer => "analyser_standard"},
properties => {
record => {
store => "true",
include_in_all => JSON::false,
type => "text",
},
}
}
};
my %sort_fields;
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return if $marc_type ne $marcflavour;
# TODO if this gets any sort of complexity to it, it should
# be broken out into its own function.
# TODO be aware of date formats, but this requires pre-parsing
# as ES will simply reject anything with an invalid date.
my $es_type =
$type eq 'boolean'
? 'boolean'
: 'text';
if ($es_type eq 'boolean') {
$mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_boolean( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
return; #Boolean cannot have facets nor sorting nor suggestions
} else {
$mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_default( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
}
# Use state to speed up repeated calls
state %all_mappings;
state %sort_fields;
if (!defined $all_mappings{$self->index}) {
$sort_fields{$self->index} = {};
my $mappings = {
data => _get_elasticsearch_mapping('general', '')
};
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return if $marc_type ne $marcflavour;
# TODO if this gets any sort of complexity to it, it should
# be broken out into its own function.
# TODO be aware of date formats, but this requires pre-parsing
# as ES will simply reject anything with an invalid date.
my $es_type = 'text';
if ($type eq 'boolean') {
$es_type = 'boolean';
} elsif ($type eq 'number' || $type eq 'sum') {
$es_type = 'integer';
} elsif ($type eq 'isbn' || $type eq 'stdno') {
$es_type = 'stdno';
}
if ($facet) {
$mappings->{data}{properties}{ $name . '__facet' } = {
type => "keyword",
};
}
if ($suggestible) {
$mappings->{data}{properties}{ $name . '__suggestion' } = {
type => 'completion',
analyzer => 'simple',
search_analyzer => 'simple',
};
}
# Sort is a bit special as it can be true, false, undef.
# We care about "true" or "undef",
# "undef" means to do the default thing, which is make it sortable.
if ($sort || !defined $sort) {
$mappings->{data}{properties}{ $name . '__sort' } = {
search_analyzer => "analyser_phrase",
analyzer => "analyser_phrase",
type => "text",
include_in_all => JSON::false,
fields => {
phrase => {
type => "keyword",
},
},
};
$sort_fields{$name} = 1;
$mappings->{data}{properties}{$name} = _get_elasticsearch_mapping('search', $es_type);
if ($facet) {
$mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_mapping('facet', $es_type);
}
if ($suggestible) {
$mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_mapping('suggestible', $es_type);
}
# Sort is a bit special as it can be true, false, undef.
# We care about "true" or "undef",
# "undef" means to do the default thing, which is make it sortable.
if (!defined $sort || $sort) {
$mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_mapping('sort', $es_type);
$sort_fields{$self->index}{$name} = 1;
}
}
}
);
$self->sort_fields(\%sort_fields);
return $mappings;
);
$all_mappings{$self->index} = $mappings;
}
$self->sort_fields(\%{$sort_fields{$self->index}});
return $all_mappings{$self->index};
}
=head2 _elasticsearch_mapping_for_*
=head2 _get_elasticsearch_mapping
Get the ES mappings for the given data type or a special mapping case
Get the ES mappings for the given purpose and data type
Receives the same parameters from the $self->_foreach_mapping() dispatcher
$mapping = _get_elasticsearch_mapping('search', 'text');
=cut
sub _elasticsearch_mapping_for_boolean {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
sub _get_elasticsearch_mapping {
return {
type => $type,
null_value => 0,
};
}
my ( $purpose, $type ) = @_;
sub _elasticsearch_mapping_for_default {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return {
search_analyzer => "analyser_standard",
analyzer => "analyser_standard",
type => $type,
fields => {
phrase => {
search_analyzer => "analyser_phrase",
analyzer => "analyser_phrase",
type => "text",
},
raw => {
type => "keyword",
}
},
};
# Use state to speed up repeated calls
state $settings = undef;
if (!defined $settings) {
my $config_file = C4::Context->config('elasticsearch_field_config');
$config_file ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/field_config.yaml';
$settings = LoadFile( $config_file );
}
if (!defined $settings->{$purpose}) {
die "Field purpose $purpose not defined in field config";
}
if ($type eq '') {
return $settings->{$purpose};
}
if (defined $settings->{$purpose}{$type}) {
return $settings->{$purpose}{$type};
}
if (defined $settings->{$purpose}{'default'}) {
return $settings->{$purpose}{'default'};
}
return undef;
}
sub reset_elasticsearch_mappings {
my $mappings_yaml = C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
my ( $reset_fields ) = @_;
my $mappings_yaml = C4::Context->config('elasticsearch_index_mappings');
$mappings_yaml ||= C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
my $indexes = LoadFile( $mappings_yaml );
while ( my ( $index_name, $fields ) = each %$indexes ) {

61
admin/searchengine/elasticsearch/field_config.yaml

@ -0,0 +1,61 @@
---
# General field configuration
general:
_all:
type: string
analyzer: analyser_standard
properties:
record:
store: true
type: text
# Search fields
search:
boolean:
type: boolean
null_value: false
integer:
type: integer
null_value: 0
stdno:
type: text
analyzer: analyser_stdno
search_analyzer: analyser_stdno
fields:
phrase:
type: text
analyzer: analyser_stdno
search_analyzer: analyser_stdno
raw:
type: keyword
copy_to: _all
default:
type: text
analyzer: analyser_standard
search_analyzer: analyser_standard
fields:
phrase:
type: text
analyzer: analyser_phrase
search_analyzer: analyser_phrase
raw:
type: keyword
copy_to: _all
# Facets
facet:
default:
type: keyword
# Suggestible
suggestible:
default:
type: completion
analyzer: simple
search_analyzer: simple
# Sort
sort:
default:
type: text
analyzer: analyser_phrase
search_analyzer: analyser_phrase
fields:
phrase:
type: keyword

34
admin/searchengine/elasticsearch/index_config.yaml

@ -0,0 +1,34 @@
---
# Index configuration that defines how different analyzers work.
index:
analysis:
analyzer:
# Phrase analyzer is used for phrases (phrase match, sorting)
analyser_phrase:
tokenizer: keyword
filter:
- icu_folding
char_filter:
- punctuation
analyser_standard:
tokenizer: icu_tokenizer
filter:
- icu_folding
analyser_stdno:
tokenizer: whitespace
filter:
- icu_folding
char_filter:
- punctuation
normalizer:
normalizer_keyword:
type: custom
filter:
- icu_folding
char_filter:
# The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer.
punctuation:
type: pattern_replace
# The pattern contains all ASCII punctuation characters.
pattern: '([\x00-\x1F,\x21-\x2F,\x3A-\x40,\x5B-\x60,\x7B-\x89,\x8B,\x8D,\x8F,\x90-\x99,\x9B,\x9D,\xA0-\xBF,\xD7,\xF7])'
replacement: ''

7
admin/searchengine/elasticsearch/mappings.yaml

@ -1,4 +1,5 @@
---
# Basic mappings from MARC fields to Elasticsearch fields.
authorities:
Corporate-name-see-also-from:
label: Corporate-name-see-also-from
@ -2162,7 +2163,7 @@ biblios:
marc_type: unimarc
sort: ~
suggestible: ''
type: ''
type: 'stdno'
isbn:
label: isbn
mappings:
@ -2181,7 +2182,7 @@ biblios:
marc_type: unimarc
sort: ~
suggestible: ''
type: ''
type: 'isbn'
issn:
label: issn
mappings:
@ -2200,7 +2201,7 @@ biblios:
marc_type: unimarc
sort: ~
suggestible: ''
type: ''
type: 'stdno'
issues:
label: issues
mappings:

11
debian/templates/koha-conf-site.xml.in

@ -328,10 +328,21 @@ __END_SRU_PUBLICSERVER__
<plack_max_requests>50</plack_max_requests>
<plack_workers>2</plack_workers>
<!-- Elasticsearch Configuration -->
<elasticsearch>
<server>localhost:9200</server>
<index_name>koha___KOHASITE__</index_name>
</elasticsearch>
<!-- Uncomment the following line if you want to override the Elasticsearch default index settings -->
<!-- <elasticsearch_index_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/index_config.yaml</elasticsearch_index_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch default field settings -->
<!-- <elasticsearch_field_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/field_config.yaml</elasticsearch_field_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch index default settings.
Note that any changes made to the mappings file only take effect if you reset the mappings in
by visiting /cgi-bin/koha/admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1&reset_fields=1.
Resetting mappings will override any changes made in the Search engine configuration UI.
-->
<!-- <elasticsearch_index_mappings>__KOHA_CONF_DIR__/searchengine/elasticsearch/mappings.yaml</elasticsearch_index_mappings> -->
<interlibrary_loans>
<!-- Path to where Illbackends are located on the system

11
etc/koha-conf.xml

@ -159,10 +159,21 @@ __PAZPAR2_TOGGLE_XML_POST__
<plack_max_requests>50</plack_max_requests>
<plack_workers>2</plack_workers>
<!-- Elasticsearch Configuration -->
<elasticsearch>
<server>localhost:9200</server>
<index_name>koha___DB_NAME__</index_name>
</elasticsearch>
<!-- Uncomment the following line if you want to override the Elasticsearch default index settings -->
<!-- <elasticsearch_index_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/index_config.yaml</elasticsearch_index_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch default field settings -->
<!-- <elasticsearch_field_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/field_config.yaml</elasticsearch_field_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch index default settings.
Note that any changes made to the mappings file only take effect if you reset the mappings in
by visiting /cgi-bin/koha/admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1&reset_fields=1.
Resetting mappings will override any changes made in the Search engine configuration UI.
-->
<!-- <elasticsearch_index_mappings>__KOHA_CONF_DIR__/searchengine/elasticsearch/mappings.yaml</elasticsearch_index_mappings> -->
<interlibrary_loans>
<!-- Path to where Illbackends are located on the system

8
installer/data/mysql/atomicupdate/bug_20073.perl

@ -0,0 +1,8 @@
$DBversion = 'XXX'; # will be replaced by the RM
if( CheckVersion( $DBversion ) ) {
$dbh->do( "ALTER TABLE search_field CHANGE COLUMN type type ENUM('', 'string', 'date', 'number', 'boolean', 'sum', 'isbn', 'stdno') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine'" );
# Always end with this (adjust the bug info)
SetVersion( $DBversion );
print "Upgrade to $DBversion done (Bug 20073 - Add new types for Elasticsearch fields)\n";
}

2
installer/data/mysql/kohastructure.sql

@ -1459,7 +1459,7 @@ CREATE TABLE `search_field` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT 'the name of the field as it will be stored in the search engine',
`label` varchar(255) NOT NULL COMMENT 'the human readable name of the field, for display',
`type` ENUM('', 'string', 'date', 'number', 'boolean', 'sum') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
`type` ENUM('', 'string', 'date', 'number', 'boolean', 'sum', 'isbn', 'stdno') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
PRIMARY KEY (`id`),
UNIQUE KEY (`name` (191))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

10
koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt

@ -164,6 +164,16 @@ a.add, a.delete {
[% ELSE %]
<option value="sum">Sum</option>
[% END %]
[% IF search_field.type == "isbn" %]
<option value="isbn" selected="selected">ISBN</option>
[% ELSE %]
<option value="isbn">ISBN</option>
[% END %]
[% IF search_field.type == "stdno" %]
<option value="stdno" selected="selected">Std. Number</option>
[% ELSE %]
<option value="stdno">Std. Number</option>
[% END %]
</select>
</td>
</tr>

26
t/Koha/SearchEngine/Elasticsearch.t

@ -17,7 +17,7 @@
use Modern::Perl;
use Test::More tests => 1;
use Test::More tests => 3;
use Test::Exception;
use t::lib::Mocks;
@ -84,3 +84,27 @@ subtest '_read_configuration() tests' => sub {
is( $configuration->{index_name}, 'index', 'Index configuration parsed correctly' );
is_deeply( $configuration->{nodes}, \@servers , 'Server configuration parsed correctly' );
};
subtest 'get_elasticsearch_settings() tests' => sub {
plan tests => 1;
my $settings;
# test reading index settings
my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
$settings = $es->get_elasticsearch_settings();
is( $settings->{index}{analysis}{analyzer}{analyser_phrase}{tokenizer}, 'keyword', 'Index settings parsed correctly' );
};
subtest 'get_elasticsearch_mappings() tests' => sub {
plan tests => 1;
my $mappings;
# test reading mappings
my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
$mappings = $es->get_elasticsearch_mappings();
is( $mappings->{data}{_all}{type}, 'string', 'Field mappings parsed correctly' );
};

Loading…
Cancel
Save