From d1077b5ae6aa5c375ef91e6150a7d39b60513325 Mon Sep 17 00:00:00 2001 From: Olli-Antti Kivilahti Date: Mon, 9 Jan 2017 18:17:48 +0200 Subject: [PATCH] Bug 17255 - Upgrade Elastic Search code to work with version 2.4+ - rebased wip -Changed deprecated facets to aggregations -Fixed boolean datatypes not allowing analyzers to be specified -Fixed deprecated '_id' to 'es_id'. Now the ES-index has the correct id==biblionumber ZE TEST PLAN 1. Reset Zebra index since facets are hard coded to dynamic search_marc_mappings. 2. perl misc/search_tools/rebuild_elastic_search.pl 3. Fetch all indexed records and the facet for subject__facet curl -XGET localhost:9200/koha_biblios/data/_search?pretty -d '{ "aggregations": { "my_agg": { "terms": { "field": "subject__facet" } } } }' Signed-off-by: Nick Clemens Signed-off-by: Tomas Cohen Arazi Signed-off-by: Kyle M Hall --- Koha/SearchEngine/Elasticsearch.pm | 91 +++++++++++++------ Koha/SearchEngine/Elasticsearch/Indexer.pm | 1 + .../Elasticsearch/QueryBuilder.pm | 4 +- Koha/SearchEngine/Elasticsearch/Search.pm | 12 +-- 4 files changed, 71 insertions(+), 37 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 35a6540dc2..f8b904cce7 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -115,6 +115,8 @@ sub get_elasticsearch_params { if ( !$es->{index_name} ); # Append the name of this particular index to our namespace $es->{index_name} .= '_' . $self->index; + + $es->{key_prefix} = 'es_'; return $es; } @@ -145,7 +147,11 @@ sub get_elasticsearch_settings { analyser_standard => { tokenizer => 'standard', filter => ['lowercase'], - } + }, + default => { + tokenizer => 'keyword', + filter => ['lowercase'], + }, }, } } @@ -174,11 +180,6 @@ sub get_elasticsearch_mappings { include_in_all => JSON::false, type => "string", }, - '_all.phrase' => { - search_analyzer => "analyser_phrase", - index_analyzer => "analyser_phrase", - type => "string", - }, } } }; @@ -188,6 +189,7 @@ sub get_elasticsearch_mappings { sub { my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; return if $marc_type ne $marcflavour; + # TODO if this gets any sort of complexity to it, it should # be broken out into its own function. @@ -197,25 +199,14 @@ sub get_elasticsearch_mappings { $type eq 'boolean' ? 'boolean' : 'string'; - $mappings->{data}{properties}{$name} = { - search_analyzer => "analyser_standard", - index_analyzer => "analyser_standard", - type => $es_type, - fields => { - phrase => { - search_analyzer => "analyser_phrase", - index_analyzer => "analyser_phrase", - type => "string", - copy_to => "_all.phrase", - }, - raw => { - "type" => "string", - "index" => "not_analyzed", - } - }, - }; - $mappings->{data}{properties}{$name}{null_value} = 0 - if $type eq 'boolean'; + + if ($es_type eq 'boolean') { + $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_boolean( $name, $es_type, $facet, $suggestible, $sort, $marc_type ); + return; #Boolean cannot have facets nor sorting nor suggestions + } else { + $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_default( $name, $es_type, $facet, $suggestible, $sort, $marc_type ); + } + if ($facet) { $mappings->{data}{properties}{ $name . '__facet' } = { type => "string", @@ -225,7 +216,7 @@ sub get_elasticsearch_mappings { if ($suggestible) { $mappings->{data}{properties}{ $name . '__suggestion' } = { type => 'completion', - index_analyzer => 'simple', + analyzer => 'simple', search_analyzer => 'simple', }; } @@ -234,13 +225,13 @@ sub get_elasticsearch_mappings { if (defined $sort) { $mappings->{data}{properties}{ $name . '__sort' } = { search_analyzer => "analyser_phrase", - index_analyzer => "analyser_phrase", + analyzer => "analyser_phrase", type => "string", include_in_all => JSON::false, fields => { phrase => { search_analyzer => "analyser_phrase", - index_analyzer => "analyser_phrase", + analyzer => "analyser_phrase", type => "string", }, }, @@ -253,6 +244,44 @@ sub get_elasticsearch_mappings { return $mappings; } +=head2 _elasticsearch_mapping_for_* + +Get the ES mappings for the given data type or a special mapping case + +Receives the same parameters from the $self->_foreach_mapping() dispatcher + +=cut + +sub _elasticsearch_mapping_for_boolean { + my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; + + return { + type => $type, + null_value => 0, + }; +} + +sub _elasticsearch_mapping_for_default { + my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_; + + return { + search_analyzer => "analyser_standard", + analyzer => "analyser_standard", + type => $type, + fields => { + phrase => { + search_analyzer => "analyser_phrase", + analyzer => "analyser_phrase", + type => "string", + }, + raw => { + type => "string", + index => "not_analyzed", + } + }, + }; +} + sub reset_elasticsearch_mappings { my $mappings_yaml = C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml'; my $indexes = LoadFile( $mappings_yaml ); @@ -294,6 +323,7 @@ sub get_fixer_rules { my $marcflavour = lc C4::Context->preference('marcflavour'); my @rules; + $self->_foreach_mapping( sub { my ( $name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field ) = @_; @@ -311,7 +341,8 @@ sub get_fixer_rules { } if ($suggestible) { push @rules, -"marc_map('$marc_field','${name}__suggestion.input.\$append', $options)"; + #"marc_map('$marc_field','${name}__suggestion.input.\$append', $options)"; #must not have nested data structures in .input + "marc_map('$marc_field','${name}__suggestion.input.\$append')"; } if ( $type eq 'boolean' ) { @@ -334,6 +365,8 @@ sub get_fixer_rules { } } ); + + push @rules, "move_field(_id,es_id)"; #Also you must set the Catmandu::Store::ElasticSearch->new(key_prefix: 'es_'); return \@rules; } diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm index b35378f48b..172580a8c7 100644 --- a/Koha/SearchEngine/Elasticsearch/Indexer.pm +++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm @@ -77,6 +77,7 @@ sub update_index { ) ); } + my $array_ref = $from->to_array; $self->store->bag->add_many($from); $self->store->bag->commit; return 1; diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index 53de2836ae..d0aa916c23 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -111,7 +111,7 @@ sub build_query { # See _convert_facets in Search.pm for how these get turned into # things that Koha can use. - $res->{facets} = { + $res->{aggregations} = { author => { terms => { field => "author__facet" } }, subject => { terms => { field => "subject__facet" } }, itype => { terms => { field => "itype__facet" } }, @@ -120,7 +120,7 @@ sub build_query { se => { terms => { field => "se__facet" } }, }; if ( my $ef = $options{expanded_facet} ) { - $res->{facets}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount'); + $res->{aggregations}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount'); }; return $res; } diff --git a/Koha/SearchEngine/Elasticsearch/Search.pm b/Koha/SearchEngine/Elasticsearch/Search.pm index d1ccd52295..6e3ca3b6a4 100644 --- a/Koha/SearchEngine/Elasticsearch/Search.pm +++ b/Koha/SearchEngine/Elasticsearch/Search.pm @@ -167,7 +167,7 @@ sub search_compat { my %result; $result{biblioserver}{hits} = $results->total; $result{biblioserver}{RECORDS} = \@records; - return (undef, \%result, $self->_convert_facets($results->{facets}, $expanded_facet)); + return (undef, \%result, $self->_convert_facets($results->{aggregations}, $expanded_facet)); } =head2 search_auth_compat @@ -430,15 +430,15 @@ sub _convert_facets { type_id => $type . '_id', expand => $type, expandable => ( $type ne $exp_facet ) - && ( @{ $data->{terms} } > $limit ), + && ( @{ $data->{buckets} } > $limit ), "type_label_$type_to_label{$type}{label}" => 1, type_link_value => $type, order => $type_to_label{$type}{order}, }; - $limit = @{ $data->{terms} } if ( $limit > @{ $data->{terms} } ); - foreach my $term ( @{ $data->{terms} }[ 0 .. $limit - 1 ] ) { - my $t = $term->{term}; - my $c = $term->{count}; + $limit = @{ $data->{buckets} } if ( $limit > @{ $data->{buckets} } ); + foreach my $term ( @{ $data->{buckets} }[ 0 .. $limit - 1 ] ) { + my $t = $term->{key}; + my $c = $term->{doc_count}; my $label; if ( exists( $special{$type} ) ) { $label = $special{$type}->{$t} // $t; -- 2.39.5