From da16f6bc9185f8ae166d46c8e35f3565e41d6bc0 Mon Sep 17 00:00:00 2001 From: David Gustafsson Date: Fri, 2 Mar 2018 18:16:39 +0100 Subject: [PATCH] Bug 20334: Option for escaping slashes in search queries MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add "QueryRegexEscapeOption" system preference to provide option to escape Elasticsearch regexp delimiters (/) within queries, or alternativly to unescape escaped slashes (\/) while escaping unescaped slashes, in effect making "\/" the new regexp delimiter. How to test: 1) Run tests in ./t/Koha/SearchEngine/ElasticSearch/QueryBuilder.t 2) All tests should succeed Signed-off-by: Nicolas Legrand Signed-off-by: Maksim Sen Signed-off-by: Séverine QUEUNE Signed-off-by: Nick Clemens Signed-off-by: Martin Renvoize --- .../Elasticsearch/QueryBuilder.pm | 34 +++++ ...34-add-syspref-QueryRegexEscapeOptions.sql | 1 + .../modules/admin/preferences/searching.pref | 7 + .../SearchEngine/ElasticSearch/QueryBuilder.t | 125 ++++++++++++++++++ 4 files changed, 167 insertions(+) create mode 100644 installer/data/mysql/atomicupdate/bug_20334-add-syspref-QueryRegexEscapeOptions.sql create mode 100644 t/Koha/SearchEngine/ElasticSearch/QueryBuilder.t diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index f44b422d71..68d09c70f4 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -892,9 +892,43 @@ sub _clean_search_term { # Remove unquoted colons that have whitespace on either side of them $term =~ s/(\:[:\s]+|[:\s]+:)$lookahead//g; + $term = $self->_query_regex_escape_process($term); + return $term; } +=head2 _query_regex_escape_process + + my $query = $self->_query_regex_escape_process($query); + +Processes query in accordance with current "QueryRegexEscapeOptions" system preference setting. + +=cut + +sub _query_regex_escape_process { + my ($self, $query) = @_; + my $regex_escape_options = C4::Context->preference("QueryRegexEscapeOptions"); + if ($regex_escape_options ne 'dont_escape') { + if ($regex_escape_options eq 'escape') { + # Will escape unescaped slashes (/) while preserving + # unescaped slashes within quotes + # @TODO: assumes quotes are always balanced and will + # not handle escaped qoutes properly, should perhaps be + # replaced with a more general parser solution + # so that this function is ever only provided with unqouted + # query parts + $query =~ s@(?:(?_fix_limit_special_cases($limits); diff --git a/installer/data/mysql/atomicupdate/bug_20334-add-syspref-QueryRegexEscapeOptions.sql b/installer/data/mysql/atomicupdate/bug_20334-add-syspref-QueryRegexEscapeOptions.sql new file mode 100644 index 0000000000..7f70795631 --- /dev/null +++ b/installer/data/mysql/atomicupdate/bug_20334-add-syspref-QueryRegexEscapeOptions.sql @@ -0,0 +1 @@ +INSERT IGNORE INTO systempreferences ( `variable`, `value`, `options`, `explanation`, `type`) VALUES ('QueryRegexEscapeOptions', 'dont_escape', 'dont_escape|escape|unescape_escaped', 'Escape option for regexps delimiters in Elasicsearch queries.', 'Choice'); diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref index f1b87d89c9..7dec372c03 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref @@ -29,6 +29,13 @@ Searching: yes: Enable no: Disable - ranking of search results by relevance (REQUIRES ZEBRA). + - + - pref: QueryRegexEscapeOptions + choices: + escape: Escape + unescape_escaped: Unescape escaped + dont_escape: Don't escape + - "regular expressions within query strings. If \"Unescape escaped\" is selected this will allow writing regular expressions \"\/like this\/\" while \"/this/\", \"or/this\" will be escaped and interpreted by Elasticsearch as regular strings." - - pref: OpacGroupResults default: 0 diff --git a/t/Koha/SearchEngine/ElasticSearch/QueryBuilder.t b/t/Koha/SearchEngine/ElasticSearch/QueryBuilder.t new file mode 100644 index 0000000000..73b3bde3ef --- /dev/null +++ b/t/Koha/SearchEngine/ElasticSearch/QueryBuilder.t @@ -0,0 +1,125 @@ +#!/usr/bin/perl +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +use Modern::Perl; + +use Test::More tests => 2; +use t::lib::Mocks; + +use_ok('Koha::SearchEngine::Elasticsearch::QueryBuilder'); + +subtest 'query_regex_escape_options' => sub { + plan tests => 12; + + t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'dont_escape'); + + my $query_with_regexp = "query /with regexp/"; + + my $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_regexp); + is( + $processed_query, + $query_with_regexp, + "Unescaped query regexp has not been escaped when escaping is disabled" + ); + + t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'escape'); + + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_regexp); + is( + $processed_query, + "query \\/with regexp\\/", + "Unescaped query regexp has been escaped when escaping is enabled" + ); + + my $query_with_escaped_regex = "query \\/with regexp\\/"; + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_escaped_regex); + is( + $processed_query, + $query_with_escaped_regex, + "Escaped query regexp has been left unmodified when escaping is enabled" + ); + + my $query_with_even_preceding_escapes_regex = "query \\\\/with regexp\\\\/"; + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_even_preceding_escapes_regex); + is( + $processed_query, + "query \\\\\\/with regexp\\\\\\/", + "Query regexp with even preceding escapes, thus unescaped, has been escaped when escaping is enabled" + ); + + my $query_with_odd_preceding_escapes_regex = 'query \\\\\\/with regexp\\\\\\/'; + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_odd_preceding_escapes_regex); + is( + $processed_query, + $query_with_odd_preceding_escapes_regex, + "Query regexp with odd preceding escapes, thus escaped, has been left unmodified when escaping is enabled" + ); + + my $query_with_quoted_slash = "query with / and \"/ within quotes\""; + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_quoted_slash); + is( + $processed_query, + "query with \\/ and \"/ within quotes\"", + "Unescaped slash outside of quotes has been escaped while unescaped slash within quotes is left as is when escaping is enabled." + ); + + t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'unescape_escaped'); + + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_regexp); + is( + $processed_query, + "query \\/with regexp\\/", + "Unescaped query regexp has been escaped when unescape escaping is enabled" + ); + + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_escaped_regex); + is( + $processed_query, + "query /with regexp/", + "Escaped query regexp has been unescaped when unescape escaping is enabled" + ); + + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_even_preceding_escapes_regex); + is( + $processed_query, + "query \\\\\\/with regexp\\\\\\/", + "Query regexp with even preceding escapes, thus unescaped, has been escaped when unescape escaping is enabled" + ); + + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_odd_preceding_escapes_regex); + is( + $processed_query, + "query \\\\/with regexp\\\\/", + "Query regexp with odd preceding escapes, thus escaped, has been unescaped when unescape escaping is enabled" + ); + + my $regexp_at_start_of_string_with_odd_preceding_escapes_regex = '\\\\\\/regexp\\\\\\/'; + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($regexp_at_start_of_string_with_odd_preceding_escapes_regex); + is( + $processed_query, + "\\\\/regexp\\\\/", + "Regexp at start of string with odd preceding escapes, thus escaped, has been unescaped when unescape escaping is enabled" + ); + + my $query_with_quoted_escaped_slash = "query with \\/ and \"\\/ within quotes\""; + $processed_query = Koha::SearchEngine::Elasticsearch::QueryBuilder->_query_regex_escape_process($query_with_quoted_escaped_slash); + is( + $processed_query, + "query with / and \"\\/ within quotes\"", + "Escaped slash outside of quotes has been unescaped while escaped slash within quotes is left as is when unescape escaping is enabled." + ); +}; -- 2.39.5