From 1404654d65baccacb2928f59171ec6f41d9b653b Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Fri, 7 Apr 2017 23:09:05 -0400 Subject: [PATCH] Bug 18318: Unicode support for Elasticsearch You must install the icu plugin for elasticsearch https://www.elastic.co/guide/en/elasticsearch/plugins/current/analysis-icu.html Once installed, apply this patch Reindex your data, deleting the existing indexes perl /home/vagrant/kohaclone/misc/search_tools/rebuild_elastic_search.pl -d Find (or add) some titles with accented characters Verify that a search for the exact character or the unaccented version works Signed-off-by: Tomas Cohen Arazi Signed-off-by: Julian Maurice Signed-off-by: Jonathan Druart --- Koha/SearchEngine/Elasticsearch.pm | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 43bb0a83a8..de4b18ce4d 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -141,12 +141,12 @@ sub get_elasticsearch_settings { analysis => { analyzer => { analyser_phrase => { - tokenizer => 'keyword', - filter => ['lowercase'], + tokenizer => 'icu_tokenizer', + filter => ['icu_folding'], }, analyser_standard => { - tokenizer => 'standard', - filter => ['lowercase'], + tokenizer => 'icu_tokenizer', + filter => ['icu_folding'], }, }, } @@ -170,6 +170,7 @@ sub get_elasticsearch_mappings { # TODO cache in the object? my $mappings = { data => { + _all => {type => "string", analyzer => "analyser_standard"}, properties => { record => { store => "true", -- 2.39.5