Pārlūkot izejas kodu

Revert "Bug 20073: Move Elasticsearch configs to yaml files and improve the default settings."

This reverts commit f489d2034b.

This commit breaks the install process when using debian packages.
Reverting as we are very close to the 18.05.00 release

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
18.05.x
Jonathan Druart pirms 3 gadiem
vecāks
revīzija
cff62796ea
  1. 201
      Koha/SearchEngine/Elasticsearch.pm
  2. 7
      admin/searchengine/elasticsearch/mappings.yaml
  3. 11
      debian/templates/koha-conf-site.xml.in
  4. 11
      etc/koha-conf.xml
  5. 61
      etc/searchengine/elasticsearch/field_config.yaml
  6. 34
      etc/searchengine/elasticsearch/index_config.yaml
  7. 2
      installer/data/mysql/kohastructure.sql
  8. 10
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt
  9. 26
      t/Koha/SearchEngine/Elasticsearch.t

201
Koha/SearchEngine/Elasticsearch.pm

@ -34,6 +34,8 @@ use Search::Elasticsearch;
use Try::Tiny;
use YAML::Syck;
use Data::Dumper; # TODO remove
__PACKAGE__->mk_ro_accessors(qw( index ));
__PACKAGE__->mk_accessors(qw( sort_fields ));
@ -135,14 +137,24 @@ A hashref containing the settings is returned.
sub get_elasticsearch_settings {
my ($self) = @_;
# Use state to speed up repeated calls
state $settings = undef;
if (!defined $settings) {
my $config_file = C4::Context->config('elasticsearch_index_config');
$config_file ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/index_config.yaml';
$settings = LoadFile( $config_file );
}
# Ultimately this should come from a file or something, and not be
# hardcoded.
my $settings = {
index => {
analysis => {
analyzer => {
analyser_phrase => {
tokenizer => 'icu_tokenizer',
filter => ['icu_folding'],
},
analyser_standard => {
tokenizer => 'icu_tokenizer',
filter => ['icu_folding'],
},
},
}
}
};
return $settings;
}
@ -158,97 +170,116 @@ created.
sub get_elasticsearch_mappings {
my ($self) = @_;
# Use state to speed up repeated calls
state %all_mappings;
state %sort_fields;
if (!defined $all_mappings{$self->index}) {
$sort_fields{$self->index} = {};
my $mappings = {
data => _get_elasticsearch_mapping('general', '')
};
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return if $marc_type ne $marcflavour;
# TODO if this gets any sort of complexity to it, it should
# be broken out into its own function.
# TODO be aware of date formats, but this requires pre-parsing
# as ES will simply reject anything with an invalid date.
my $es_type = 'text';
if ($type eq 'boolean') {
$es_type = 'boolean';
} elsif ($type eq 'number' || $type eq 'sum') {
$es_type = 'integer';
} elsif ($type eq 'isbn' || $type eq 'stdno') {
$es_type = 'stdno';
}
$mappings->{data}{properties}{$name} = _get_elasticsearch_mapping('search', $es_type);
if ($facet) {
$mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_mapping('facet', $es_type);
}
if ($suggestible) {
$mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_mapping('suggestible', $es_type);
}
# Sort is a bit special as it can be true, false, undef.
# We care about "true" or "undef",
# "undef" means to do the default thing, which is make it sortable.
if (!defined $sort || $sort) {
$mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_mapping('sort', $es_type);
$sort_fields{$self->index}{$name} = 1;
}
# TODO cache in the object?
my $mappings = {
data => {
_all => {type => "string", analyzer => "analyser_standard"},
properties => {
record => {
store => "true",
include_in_all => JSON::false,
type => "text",
},
}
}
};
my %sort_fields;
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return if $marc_type ne $marcflavour;
# TODO if this gets any sort of complexity to it, it should
# be broken out into its own function.
# TODO be aware of date formats, but this requires pre-parsing
# as ES will simply reject anything with an invalid date.
my $es_type =
$type eq 'boolean'
? 'boolean'
: 'text';
if ($es_type eq 'boolean') {
$mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_boolean( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
return; #Boolean cannot have facets nor sorting nor suggestions
} else {
$mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_default( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
}
);
$all_mappings{$self->index} = $mappings;
}
$self->sort_fields(\%{$sort_fields{$self->index}});
return $all_mappings{$self->index};
if ($facet) {
$mappings->{data}{properties}{ $name . '__facet' } = {
type => "keyword",
};
}
if ($suggestible) {
$mappings->{data}{properties}{ $name . '__suggestion' } = {
type => 'completion',
analyzer => 'simple',
search_analyzer => 'simple',
};
}
# Sort is a bit special as it can be true, false, undef.
# We care about "true" or "undef",
# "undef" means to do the default thing, which is make it sortable.
if ($sort || !defined $sort) {
$mappings->{data}{properties}{ $name . '__sort' } = {
search_analyzer => "analyser_phrase",
analyzer => "analyser_phrase",
type => "text",
include_in_all => JSON::false,
fields => {
phrase => {
type => "keyword",
},
},
};
$sort_fields{$name} = 1;
}
}
);
$self->sort_fields(\%sort_fields);
return $mappings;
}
=head2 _get_elasticsearch_mapping
=head2 _elasticsearch_mapping_for_*
Get the ES mappings for the given purpose and data type
Get the ES mappings for the given data type or a special mapping case
$mapping = _get_elasticsearch_mapping('search', 'text');
Receives the same parameters from the $self->_foreach_mapping() dispatcher
=cut
sub _get_elasticsearch_mapping {
my ( $purpose, $type ) = @_;
sub _elasticsearch_mapping_for_boolean {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
# Use state to speed up repeated calls
state $settings = undef;
if (!defined $settings) {
my $config_file = C4::Context->config('elasticsearch_field_config');
$config_file ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/field_config.yaml';
$settings = LoadFile( $config_file );
}
return {
type => $type,
null_value => 0,
};
}
if (!defined $settings->{$purpose}) {
die "Field purpose $purpose not defined in field config";
}
if ($type eq '') {
return $settings->{$purpose};
}
if (defined $settings->{$purpose}{$type}) {
return $settings->{$purpose}{$type};
}
if (defined $settings->{$purpose}{'default'}) {
return $settings->{$purpose}{'default'};
}
return undef;
sub _elasticsearch_mapping_for_default {
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return {
search_analyzer => "analyser_standard",
analyzer => "analyser_standard",
type => $type,
fields => {
phrase => {
search_analyzer => "analyser_phrase",
analyzer => "analyser_phrase",
type => "text",
},
raw => {
type => "keyword",
}
},
};
}
sub reset_elasticsearch_mappings {
my ( $reset_fields ) = @_;
my $mappings_yaml = C4::Context->config('elasticsearch_index_mappings');
$mappings_yaml ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/mappings.yaml';
my $mappings_yaml = C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
my $indexes = LoadFile( $mappings_yaml );
while ( my ( $index_name, $fields ) = each %$indexes ) {

7
etc/searchengine/elasticsearch/mappings.yaml → admin/searchengine/elasticsearch/mappings.yaml

@ -1,5 +1,4 @@
---
# Basic mappings from MARC fields to Elasticsearch fields.
authorities:
Corporate-name-see-also-from:
label: Corporate-name-see-also-from
@ -2163,7 +2162,7 @@ biblios:
marc_type: unimarc
sort: ~
suggestible: ''
type: 'stdno'
type: ''
isbn:
label: isbn
mappings:
@ -2182,7 +2181,7 @@ biblios:
marc_type: unimarc
sort: ~
suggestible: ''
type: 'isbn'
type: ''
issn:
label: issn
mappings:
@ -2201,7 +2200,7 @@ biblios:
marc_type: unimarc
sort: ~
suggestible: ''
type: 'stdno'
type: ''
issues:
label: issues
mappings:

11
debian/templates/koha-conf-site.xml.in

@ -328,21 +328,10 @@ __END_SRU_PUBLICSERVER__
<plack_max_requests>50</plack_max_requests>
<plack_workers>2</plack_workers>
<!-- Elasticsearch Configuration -->
<elasticsearch>
<server>localhost:9200</server>
<index_name>koha___KOHASITE__</index_name>
</elasticsearch>
<!-- Uncomment the following line if you want to override the Elasticsearch default index settings -->
<!-- <elasticsearch_index_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/index_config.yaml</elasticsearch_index_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch default field settings -->
<!-- <elasticsearch_field_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/field_config.yaml</elasticsearch_field_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch index default settings.
Note that any changes made to the mappings file only take effect if you reset the mappings in
by visiting /cgi-bin/koha/admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1&reset_fields=1.
Resetting mappings will override any changes made in the Search engine configuration UI.
-->
<!-- <elasticsearch_index_mappings>__KOHA_CONF_DIR__/searchengine/elasticsearch/mappings.yaml</elasticsearch_index_mappings> -->
<interlibrary_loans>
<!-- Path to where Illbackends are located on the system

11
etc/koha-conf.xml

@ -159,21 +159,10 @@ __PAZPAR2_TOGGLE_XML_POST__
<plack_max_requests>50</plack_max_requests>
<plack_workers>2</plack_workers>
<!-- Elasticsearch Configuration -->
<elasticsearch>
<server>localhost:9200</server>
<index_name>koha___DB_NAME__</index_name>
</elasticsearch>
<!-- Uncomment the following line if you want to override the Elasticsearch default index settings -->
<!-- <elasticsearch_index_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/index_config.yaml</elasticsearch_index_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch default field settings -->
<!-- <elasticsearch_field_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/field_config.yaml</elasticsearch_field_config> -->
<!-- Uncomment the following line if you want to override the Elasticsearch index default settings.
Note that any changes made to the mappings file only take effect if you reset the mappings in
by visiting /cgi-bin/koha/admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1&reset_fields=1.
Resetting mappings will override any changes made in the Search engine configuration UI.
-->
<!-- <elasticsearch_index_mappings>__KOHA_CONF_DIR__/searchengine/elasticsearch/mappings.yaml</elasticsearch_index_mappings> -->
<interlibrary_loans>
<!-- Path to where Illbackends are located on the system

61
etc/searchengine/elasticsearch/field_config.yaml

@ -1,61 +0,0 @@
---
# General field configuration
general:
_all:
type: string
analyzer: analyser_standard
properties:
record:
store: true
type: text
# Search fields
search:
boolean:
type: boolean
null_value: false
integer:
type: integer
null_value: 0
stdno:
type: text
analyzer: analyser_stdno
search_analyzer: analyser_stdno
fields:
phrase:
type: text
analyzer: analyser_stdno
search_analyzer: analyser_stdno
raw:
type: keyword
copy_to: _all
default:
type: text
analyzer: analyser_standard
search_analyzer: analyser_standard
fields:
phrase:
type: text
analyzer: analyser_phrase
search_analyzer: analyser_phrase
raw:
type: keyword
copy_to: _all
# Facets
facet:
default:
type: keyword
# Suggestible
suggestible:
default:
type: completion
analyzer: simple
search_analyzer: simple
# Sort
sort:
default:
type: text
analyzer: analyser_phrase
search_analyzer: analyser_phrase
fields:
phrase:
type: keyword

34
etc/searchengine/elasticsearch/index_config.yaml

@ -1,34 +0,0 @@
---
# Index configuration that defines how different analyzers work.
index:
analysis:
analyzer:
# Phrase analyzer is used for phrases (phrase match, sorting)
analyser_phrase:
tokenizer: keyword
filter:
- icu_folding
char_filter:
- punctuation
analyser_standard:
tokenizer: icu_tokenizer
filter:
- icu_folding
analyser_stdno:
tokenizer: whitespace
filter:
- icu_folding
char_filter:
- punctuation
normalizer:
normalizer_keyword:
type: custom
filter:
- icu_folding
char_filter:
# The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer.
punctuation:
type: pattern_replace
# The pattern contains all ASCII punctuation characters.
pattern: '([\x00-\x1F,\x21-\x2F,\x3A-\x40,\x5B-\x60,\x7B-\x89,\x8B,\x8D,\x8F,\x90-\x99,\x9B,\x9D,\xA0-\xBF,\xD7,\xF7])'
replacement: ''

2
installer/data/mysql/kohastructure.sql

@ -1459,7 +1459,7 @@ CREATE TABLE `search_field` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT 'the name of the field as it will be stored in the search engine',
`label` varchar(255) NOT NULL COMMENT 'the human readable name of the field, for display',
`type` ENUM('', 'string', 'date', 'number', 'boolean', 'sum', 'isbn', 'stdno') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
`type` ENUM('', 'string', 'date', 'number', 'boolean', 'sum') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
PRIMARY KEY (`id`),
UNIQUE KEY (`name` (191))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

10
koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt

@ -164,16 +164,6 @@ a.add, a.delete {
[% ELSE %]
<option value="sum">Sum</option>
[% END %]
[% IF search_field.type == "isbn" %]
<option value="isbn" selected="selected">ISBN</option>
[% ELSE %]
<option value="isbn">ISBN</option>
[% END %]
[% IF search_field.type == "stdno" %]
<option value="stdno" selected="selected">Std. Number</option>
[% ELSE %]
<option value="stdno">Std. Number</option>
[% END %]
</select>
</td>
</tr>

26
t/Koha/SearchEngine/Elasticsearch.t

@ -17,7 +17,7 @@
use Modern::Perl;
use Test::More tests => 3;
use Test::More tests => 1;
use Test::Exception;
use t::lib::Mocks;
@ -84,27 +84,3 @@ subtest '_read_configuration() tests' => sub {
is( $configuration->{index_name}, 'index', 'Index configuration parsed correctly' );
is_deeply( $configuration->{nodes}, \@servers , 'Server configuration parsed correctly' );
};
subtest 'get_elasticsearch_settings() tests' => sub {
plan tests => 1;
my $settings;
# test reading index settings
my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
$settings = $es->get_elasticsearch_settings();
is( $settings->{index}{analysis}{analyzer}{analyser_phrase}{tokenizer}, 'keyword', 'Index settings parsed correctly' );
};
subtest 'get_elasticsearch_mappings() tests' => sub {
plan tests => 1;
my $mappings;
# test reading mappings
my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
$mappings = $es->get_elasticsearch_mappings();
is( $mappings->{data}{_all}{type}, 'string', 'Field mappings parsed correctly' );
};
Notiek ielāde…
Atcelt
Saglabāt