From cc988b2d16cc0399a979d3ce491e29084a0d1166 Mon Sep 17 00:00:00 2001 From: Petro Vashchuk Date: Mon, 6 Sep 2021 16:46:45 +0300 Subject: [PATCH] Bug 28316: escape ES ranges if QueryAutoTruncate is enabled if QueryAutoTruncate enabled we will have any special operators ruined for example: "test [6 TO 7]" will be converted to "test* [6* TO* 7]" so no reason to keep ranges when QueryAutoTruncate set to "enabled" 1) enable QueryAutoTruncate at your sysprefs. 2) perform a search using range, for example: "[1999 TO 2020]", it shouldn't work the way it's supposed to. 3) apply the patch. 4) perform the same search with range, ensure that it works correctly. Signed-off-by: Alex Buckley Signed-off-by: Martin Renvoize Signed-off-by: Martin Renvoize Signed-off-by: Jonathan Druart --- .../Elasticsearch/QueryBuilder.pm | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index 8cf45ab339..308aab09f8 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -939,7 +939,28 @@ sub _clean_search_term { } $term = $self->_query_regex_escape_process($term); + # because of _truncate_terms and if QueryAutoTruncate enabled + # we will have any special operators ruined by _truncate_terms: + # for ex. search for "test [6 TO 7]" will be converted to "test* [6* TO* 7]" + # so no reason to keep ranges in QueryAutoTruncate==true case: + my $truncate = C4::Context->preference("QueryAutoTruncate") || 0; + unless($truncate) { + # replace all ranges with any square/curly brackets combinations to temporary substitutions (ex: "{a TO b]"" -> "~~LC~~a TO b~~RS~~") + # (where L is for left and C is for Curly and so on) + $term =~ s/ + (?(?:[\\]{2})*) + (?\{|\[) + (? + [^\s\[\]\{\}]+\ TO\ [^\s\[\]\{\}]+ + (?\}|\]) + /$+{backslashes}.'~~L'.($+{leftbracket} eq '[' ? 'S':'C').'~~'.$+{ranges}.'~~R'.($+{rightbracket} eq ']' ? 'S':'C').'~~'/gex; + } # save all regex contents away before escaping brackets: + # (same trick as with brackets above, just RE for 'RegularExpression') my @saved_regexes; my $rgx_i = 0; while( @@ -974,6 +995,10 @@ sub _clean_search_term { for (my $i = 0; $i < @saved_regexes; $i++) { $term =~ s/~~RE$i~~/$saved_regexes[$i]/; } + unless($truncate) { + # restore temporary weird substitutions back to normal brackets + $term =~ s/~~L(C|S)~~([^\s\[\]\{\}]+ TO [^\s\[\]\{\}]+)~~R(C|S)~~/($1 eq 'S' ? '[':'{').$2.($3 eq 'S' ? ']':'}')/ge; + } return $term; } -- 2.39.5