From cff62796ea9fc227c24fec8f8f3f153a715796ef Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Mon, 21 May 2018 11:02:04 -0300 Subject: [PATCH] Revert "Bug 20073: Move Elasticsearch configs to yaml files and improve the default settings." This reverts commit f489d2034bbc3bff67230423a7986d5ea34060ec. This commit breaks the install process when using debian packages. Reverting as we are very close to the 18.05.00 release Signed-off-by: Jonathan Druart --- Koha/SearchEngine/Elasticsearch.pm | 201 ++++++++++-------- .../searchengine/elasticsearch/mappings.yaml | 7 +- debian/templates/koha-conf-site.xml.in | 11 - etc/koha-conf.xml | 11 - .../elasticsearch/field_config.yaml | 61 ------ .../elasticsearch/index_config.yaml | 34 --- installer/data/mysql/kohastructure.sql | 2 +- .../searchengine/elasticsearch/mappings.tt | 10 - t/Koha/SearchEngine/Elasticsearch.t | 26 +-- 9 files changed, 121 insertions(+), 242 deletions(-) rename {etc => admin}/searchengine/elasticsearch/mappings.yaml (99%) delete mode 100644 etc/searchengine/elasticsearch/field_config.yaml delete mode 100644 etc/searchengine/elasticsearch/index_config.yaml diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index e90af21711..fe2e0ea17c 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -34,6 +34,8 @@ use Search::Elasticsearch; use Try::Tiny; use YAML::Syck; +use Data::Dumper; # TODO remove + __PACKAGE__->mk_ro_accessors(qw( index )); __PACKAGE__->mk_accessors(qw( sort_fields )); @@ -135,14 +137,24 @@ A hashref containing the settings is returned. sub get_elasticsearch_settings { my ($self) = @_; - # Use state to speed up repeated calls - state $settings = undef; - if (!defined $settings) { - my $config_file = C4::Context->config('elasticsearch_index_config'); - $config_file ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/index_config.yaml'; - $settings = LoadFile( $config_file ); - } - + # Ultimately this should come from a file or something, and not be + # hardcoded. + my $settings = { + index => { + analysis => { + analyzer => { + analyser_phrase => { + tokenizer => 'icu_tokenizer', + filter => ['icu_folding'], + }, + analyser_standard => { + tokenizer => 'icu_tokenizer', + filter => ['icu_folding'], + }, + }, + } + } + }; return $settings; } @@ -158,97 +170,116 @@ created. sub get_elasticsearch_mappings { my ($self) = @_; - # Use state to speed up repeated calls - state %all_mappings; - state %sort_fields; - - if (!defined $all_mappings{$self->index}) { - $sort_fields{$self->index} = {}; - my $mappings = { - data => _get_elasticsearch_mapping('general', '') - }; - my $marcflavour = lc C4::Context->preference('marcflavour'); - $self->_foreach_mapping( - sub { - my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; - return if $marc_type ne $marcflavour; - # TODO if this gets any sort of complexity to it, it should - # be broken out into its own function. - - # TODO be aware of date formats, but this requires pre-parsing - # as ES will simply reject anything with an invalid date. - my $es_type = 'text'; - if ($type eq 'boolean') { - $es_type = 'boolean'; - } elsif ($type eq 'number' || $type eq 'sum') { - $es_type = 'integer'; - } elsif ($type eq 'isbn' || $type eq 'stdno') { - $es_type = 'stdno'; - } - - $mappings->{data}{properties}{$name} = _get_elasticsearch_mapping('search', $es_type); - - if ($facet) { - $mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_mapping('facet', $es_type); - } - if ($suggestible) { - $mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_mapping('suggestible', $es_type); - } - # Sort is a bit special as it can be true, false, undef. - # We care about "true" or "undef", - # "undef" means to do the default thing, which is make it sortable. - if (!defined $sort || $sort) { - $mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_mapping('sort', $es_type); - $sort_fields{$self->index}{$name} = 1; - } + # TODO cache in the object? + my $mappings = { + data => { + _all => {type => "string", analyzer => "analyser_standard"}, + properties => { + record => { + store => "true", + include_in_all => JSON::false, + type => "text", + }, + } + } + }; + my %sort_fields; + my $marcflavour = lc C4::Context->preference('marcflavour'); + $self->_foreach_mapping( + sub { + my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; + return if $marc_type ne $marcflavour; + # TODO if this gets any sort of complexity to it, it should + # be broken out into its own function. + + # TODO be aware of date formats, but this requires pre-parsing + # as ES will simply reject anything with an invalid date. + my $es_type = + $type eq 'boolean' + ? 'boolean' + : 'text'; + + if ($es_type eq 'boolean') { + $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_boolean( $name, $es_type, $facet, $suggestible, $sort, $marc_type ); + return; #Boolean cannot have facets nor sorting nor suggestions + } else { + $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_default( $name, $es_type, $facet, $suggestible, $sort, $marc_type ); } - ); - $all_mappings{$self->index} = $mappings; - } - $self->sort_fields(\%{$sort_fields{$self->index}}); - return $all_mappings{$self->index}; + if ($facet) { + $mappings->{data}{properties}{ $name . '__facet' } = { + type => "keyword", + }; + } + if ($suggestible) { + $mappings->{data}{properties}{ $name . '__suggestion' } = { + type => 'completion', + analyzer => 'simple', + search_analyzer => 'simple', + }; + } + # Sort is a bit special as it can be true, false, undef. + # We care about "true" or "undef", + # "undef" means to do the default thing, which is make it sortable. + if ($sort || !defined $sort) { + $mappings->{data}{properties}{ $name . '__sort' } = { + search_analyzer => "analyser_phrase", + analyzer => "analyser_phrase", + type => "text", + include_in_all => JSON::false, + fields => { + phrase => { + type => "keyword", + }, + }, + }; + $sort_fields{$name} = 1; + } + } + ); + $self->sort_fields(\%sort_fields); + return $mappings; } -=head2 _get_elasticsearch_mapping +=head2 _elasticsearch_mapping_for_* -Get the ES mappings for the given purpose and data type +Get the ES mappings for the given data type or a special mapping case -$mapping = _get_elasticsearch_mapping('search', 'text'); +Receives the same parameters from the $self->_foreach_mapping() dispatcher =cut -sub _get_elasticsearch_mapping { - - my ( $purpose, $type ) = @_; +sub _elasticsearch_mapping_for_boolean { + my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; - # Use state to speed up repeated calls - state $settings = undef; - if (!defined $settings) { - my $config_file = C4::Context->config('elasticsearch_field_config'); - $config_file ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/field_config.yaml'; - $settings = LoadFile( $config_file ); - } + return { + type => $type, + null_value => 0, + }; +} - if (!defined $settings->{$purpose}) { - die "Field purpose $purpose not defined in field config"; - } - if ($type eq '') { - return $settings->{$purpose}; - } - if (defined $settings->{$purpose}{$type}) { - return $settings->{$purpose}{$type}; - } - if (defined $settings->{$purpose}{'default'}) { - return $settings->{$purpose}{'default'}; - } - return undef; +sub _elasticsearch_mapping_for_default { + my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; + + return { + search_analyzer => "analyser_standard", + analyzer => "analyser_standard", + type => $type, + fields => { + phrase => { + search_analyzer => "analyser_phrase", + analyzer => "analyser_phrase", + type => "text", + }, + raw => { + type => "keyword", + } + }, + }; } sub reset_elasticsearch_mappings { - my ( $reset_fields ) = @_; - my $mappings_yaml = C4::Context->config('elasticsearch_index_mappings'); - $mappings_yaml ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/mappings.yaml'; + my $mappings_yaml = C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml'; my $indexes = LoadFile( $mappings_yaml ); while ( my ( $index_name, $fields ) = each %$indexes ) { diff --git a/etc/searchengine/elasticsearch/mappings.yaml b/admin/searchengine/elasticsearch/mappings.yaml similarity index 99% rename from etc/searchengine/elasticsearch/mappings.yaml rename to admin/searchengine/elasticsearch/mappings.yaml index 18e63fa5e8..acfc24ce41 100644 --- a/etc/searchengine/elasticsearch/mappings.yaml +++ b/admin/searchengine/elasticsearch/mappings.yaml @@ -1,5 +1,4 @@ --- -# Basic mappings from MARC fields to Elasticsearch fields. authorities: Corporate-name-see-also-from: label: Corporate-name-see-also-from @@ -2163,7 +2162,7 @@ biblios: marc_type: unimarc sort: ~ suggestible: '' - type: 'stdno' + type: '' isbn: label: isbn mappings: @@ -2182,7 +2181,7 @@ biblios: marc_type: unimarc sort: ~ suggestible: '' - type: 'isbn' + type: '' issn: label: issn mappings: @@ -2201,7 +2200,7 @@ biblios: marc_type: unimarc sort: ~ suggestible: '' - type: 'stdno' + type: '' issues: label: issues mappings: diff --git a/debian/templates/koha-conf-site.xml.in b/debian/templates/koha-conf-site.xml.in index d2b47fcb30..9b11c714b4 100644 --- a/debian/templates/koha-conf-site.xml.in +++ b/debian/templates/koha-conf-site.xml.in @@ -328,21 +328,10 @@ __END_SRU_PUBLICSERVER__ 50 2 - localhost:9200 koha___KOHASITE__ - - - - - - localhost:9200 koha___DB_NAME__ - - - - - -