Bug 17661: Ending punctuation causes duplicate facets
[koha.git] / admin / searchengine / elasticsearch / index_config.yaml
1 ---
2 # Index configuration that defines how different analyzers work.
3 index:
4   analysis:
5     analyzer:
6       # Phrase analyzer is used for phrases (exact phrase match)
7       analyzer_phrase:
8         tokenizer: keyword
9         filter:
10           - icu_folding
11         char_filter:
12           - punctuation
13       analyzer_standard:
14         tokenizer: icu_tokenizer
15         filter:
16           - icu_folding
17       analyzer_stdno:
18         tokenizer: whitespace
19         filter:
20           - icu_folding
21         char_filter:
22           - punctuation
23     normalizer:
24       icu_folding_normalizer:
25         type: custom
26         filter:
27           - icu_folding
28       nfkc_cf_normalizer:
29         type: custom
30         char_filter: icu_normalizer
31       facet_normalizer:
32         char_filter: facet
33     char_filter:
34       # The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer.
35       punctuation:
36         type: pattern_replace
37         # The pattern contains all ASCII punctuation characters.
38         pattern: '([\x00-\x1F,\x21-\x2F,\x3A-\x40,\x5B-\x60,\x7B-\x89,\x8B,\x8D,\x8F,\x90-\x99,\x9B,\x9D,\xA0-\xBF,\xD7,\xF7])'
39         replacement: ''
40       facet:
41         type: pattern_replace
42         pattern: '\s*(?<![A-Z])[.\-,;]*\s*$'
43         replacement: ''
44 index.mapping.total_fields.limit: 10000