From 999f8ba0a8f36b3237e3e18758738bdb3812d31a Mon Sep 17 00:00:00 2001 From: Petro Vashchuk Date: Fri, 18 Jun 2021 10:43:14 +0300 Subject: [PATCH] Bug 28316: screen unquoted semicolons and all followup colons Currently searches like: "book:", ":book" and "host-item:test:n" cause internal server errors. This patch adds additional regexes that remove the colons at the start and end of the query, and another regex that screens all follow-up colons that go after the first colon to avoid errors when searching for "host-item:test:n". To reproduce: 1) using ES, search for the book with title that contains semicolon at the start or at the end of the line, separated with spaces, this should cause internal server error. 2) try doing the same with something like "host-item:test:n", it should result in error as well. 3) apply the patch. 4) repeat steps 1-2, ensure that it works now. Signed-off-by: Victor Grousset/tuxayo Signed-off-by: Alex Buckley Signed-off-by: Martin Renvoize Signed-off-by: Martin Renvoize Signed-off-by: Jonathan Druart --- .../SearchEngine/Elasticsearch/QueryBuilder.pm | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index cce55f0f8d..6cb4fe0d8e 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -938,14 +938,20 @@ sub _clean_search_term { if ($count % 2 == 1) { $term = $unquoted; } - - # Remove unquoted colons that have whitespace on either side of them - $term =~ s/(:+)(\s+)$lookahead/$2/g; - $term =~ s/(\s+)(:+)$lookahead/$1/g; - $term =~ s/^://; - $term = $self->_query_regex_escape_process($term); + # remove leading and trailing colons mixed with optional slashes and spaces + $term =~ s/^([\s\\]*:\s*)+//; + $term =~ s/([\s\\]*:\s*)+$//; + # remove unquoted colons that have whitespace on either side of them + $term =~ s/([\s\\]*:\s*)+(\s+)$lookahead/$2/g; + $term =~ s/(\s+)([\s\\]*:\s*)+$lookahead/$1/g; + # replace with spaces all repeated colons no matter how they surrounded with spaces and slashes + $term =~ s/([\s\\]*:\s*){2,}$lookahead/ /g; + # screen all followups for colons after first colon, + # and correctly ignore unevenly backslashed: + $term =~ s/((?