diff --git a/C4/Search.pm b/C4/Search.pm index 56b7991a04..a9a5c28725 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -680,6 +680,7 @@ sub _get_facets_data_from_record { next if $field->indicator(1) eq 'z'; my $data = $field->as_string( $subfield_letters, $facet->{ sep } ); + $data =~ s/\s*(?textContent; + $facet_value =~ s/\s*(?{ $facet_value } = $term->getAttribute( 'occur' ); + $facets->{ $facet_value } = ( defined $facets->{$facet_value} ) ? $facets->{ $facet_value } + $term->getAttribute( 'occur' ) : $term->getAttribute( 'occur' ); } return $facets; diff --git a/admin/searchengine/elasticsearch/field_config.yaml b/admin/searchengine/elasticsearch/field_config.yaml index 82793ff421..c98b26dcfe 100644 --- a/admin/searchengine/elasticsearch/field_config.yaml +++ b/admin/searchengine/elasticsearch/field_config.yaml @@ -55,6 +55,7 @@ search: facet: default: type: keyword + normalizer: facet_normalizer # Suggestible suggestible: default: diff --git a/admin/searchengine/elasticsearch/index_config.yaml b/admin/searchengine/elasticsearch/index_config.yaml index 7a8d9052b4..dfae04dd97 100644 --- a/admin/searchengine/elasticsearch/index_config.yaml +++ b/admin/searchengine/elasticsearch/index_config.yaml @@ -28,6 +28,8 @@ index: nfkc_cf_normalizer: type: custom char_filter: icu_normalizer + facet_normalizer: + char_filter: facet char_filter: # The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer. punctuation: @@ -35,4 +37,8 @@ index: # The pattern contains all ASCII punctuation characters. pattern: '([\x00-\x1F,\x21-\x2F,\x3A-\x40,\x5B-\x60,\x7B-\x89,\x8B,\x8D,\x8F,\x90-\x99,\x9B,\x9D,\xA0-\xBF,\xD7,\xF7])' replacement: '' + facet: + type: pattern_replace + pattern: '\s*(?