Nick Clemens
f983c1e3f7
To test: 1 - Do some authority searches in Zebra 2 - Switch to ES and repeat, results will vary and some may fail 3 - Apply patch and dependencies 4 - Reindex ES 5 - Repeat searches, they should suceed and results should be similar to Zebra 6 - Slight differences are okay, but results should (mostly) meet expectations A few notes: We add a 'normalizer' to ensure we get a single token from the heading indexes, this makes 'starts with' work as expcted We switch to 'AND' for fields searched from cataloging editor - this matches Zebra results We force the '__sort' fields for sorting - if sorting looks wrong try reducing the heading field to a single subfield - this will need to be addressed on a future bug (multiple subfields create an array, ES sorts those randomly) Signed-off-by: Nicolas Legrand <nicolas.legrand@bulac.fr> Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de> Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
37 lines
1.1 KiB
YAML
37 lines
1.1 KiB
YAML
---
|
|
# Index configuration that defines how different analyzers work.
|
|
index:
|
|
analysis:
|
|
analyzer:
|
|
# Phrase analyzer is used for phrases (phrase match, sorting)
|
|
analyser_phrase:
|
|
tokenizer: keyword
|
|
filter:
|
|
- icu_folding
|
|
char_filter:
|
|
- punctuation
|
|
analyser_standard:
|
|
tokenizer: icu_tokenizer
|
|
filter:
|
|
- icu_folding
|
|
analyser_stdno:
|
|
tokenizer: whitespace
|
|
filter:
|
|
- icu_folding
|
|
char_filter:
|
|
- punctuation
|
|
normalizer:
|
|
normalizer_keyword:
|
|
type: custom
|
|
filter:
|
|
- icu_folding
|
|
my_normalizer:
|
|
type: custom
|
|
char_filter: icu_normalizer
|
|
char_filter:
|
|
# The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer.
|
|
punctuation:
|
|
type: pattern_replace
|
|
# The pattern contains all ASCII punctuation characters.
|
|
pattern: '([\x00-\x1F,\x21-\x2F,\x3A-\x40,\x5B-\x60,\x7B-\x89,\x8B,\x8D,\x8F,\x90-\x99,\x9B,\x9D,\xA0-\xBF,\xD7,\xF7])'
|
|
replacement: ''
|