From f297fb819c38553899f2cde10b108ca8fa712dce Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Thu, 27 Aug 2020 16:27:09 +0000 Subject: [PATCH] Bug 17661: (follow-up) Update regex to support Unicode characters MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Rather than limiting initials to [A-Z] we should test for a broad range of uppercase letters. The ES/Zebra changes are slightly different because of Perl vs Java regex conventions. POerl may support either, but I found 'Uppercase' to be a bit more explicit More info here: https://perldoc.perl.org/perlunicode.html TO test: Same plan as before but use Ж. as the ending initial Confirm the period is preserved and other punctuation removed Signed-off-by: Katrin Fischer Signed-off-by: Jonathan Druart (cherry picked from commit 224ac84aeca3b8dba87366925d83b01e0f5c1110) Signed-off-by: Lucas Gass --- C4/Search.pm | 4 ++-- admin/searchengine/elasticsearch/index_config.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/C4/Search.pm b/C4/Search.pm index e2f2d0936b..d39bbe350f 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -680,7 +680,7 @@ sub _get_facets_data_from_record { next if $field->indicator(1) eq 'z'; my $data = $field->as_string( $subfield_letters, $facet->{ sep } ); - $data =~ s/\s*(?textContent; - $facet_value =~ s/\s*(?{ $facet_value } = ( defined $facets->{$facet_value} ) ? $facets->{ $facet_value } + $term->getAttribute( 'occur' ) : $term->getAttribute( 'occur' ); } diff --git a/admin/searchengine/elasticsearch/index_config.yaml b/admin/searchengine/elasticsearch/index_config.yaml index dfae04dd97..e424e92a0b 100644 --- a/admin/searchengine/elasticsearch/index_config.yaml +++ b/admin/searchengine/elasticsearch/index_config.yaml @@ -39,6 +39,6 @@ index: replacement: '' facet: type: pattern_replace - pattern: '\s*(?