Browse Source

Bug 17661: (follow-up) Update regex to support Unicode characters

Rather than limiting initials to [A-Z] we should test for a broad
range of uppercase letters.

The ES/Zebra changes are slightly different because of Perl vs Java regex
conventions. POerl may support either, but I found 'Uppercase' to be a bit more explicit

More info here:
https://perldoc.perl.org/perlunicode.html

TO test:
Same plan as before but use Ж. as the ending initial
Confirm the period is preserved and other punctuation removed

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
20.11.x
Nick Clemens 4 years ago
committed by Jonathan Druart
parent
commit
224ac84aec
  1. 4
      C4/Search.pm
  2. 2
      admin/searchengine/elasticsearch/index_config.yaml

4
C4/Search.pm

@ -680,7 +680,7 @@ sub _get_facets_data_from_record {
next if $field->indicator(1) eq 'z';
my $data = $field->as_string( $subfield_letters, $facet->{ sep } );
$data =~ s/\s*(?<![A-Z])[.\-,;]*\s*$//;
$data =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
unless ( grep { $_ eq $data } @used_datas ) {
push @used_datas, $data;
@ -779,7 +779,7 @@ sub _get_facet_from_result_set {
my $facets = {};
foreach my $term ( @terms ) {
my $facet_value = $term->textContent;
$facet_value =~ s/\s*(?<![A-Z])[.\-,;]*\s*$//;
$facet_value =~ s/\s*(?<!\p{Uppercase})[.\-,;]*\s*$//;
$facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep;
$facets->{ $facet_value } = ( defined $facets->{$facet_value} ) ? $facets->{ $facet_value } + $term->getAttribute( 'occur' ) : $term->getAttribute( 'occur' );
}

2
admin/searchengine/elasticsearch/index_config.yaml

@ -39,6 +39,6 @@ index:
replacement: ''
facet:
type: pattern_replace
pattern: '\s*(?<![A-Z])[.\-,;]*\s*$'
pattern: '\s*(?<!\p{Lu})[.\-,;]*\s*$'
replacement: ''
index.mapping.total_fields.limit: 10000

Loading…
Cancel
Save