From dd98584c8572b85afbd68ff98423605df9b18822 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Thu, 31 Jan 2019 13:43:00 +0200 Subject: [PATCH] Bug 22228: Escape lone colons and make sure term is balanced MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Escapes unquoted colons that have whitespace on either side. Removed unbalanced quotes. Test plan: 1. Make sure the test case described in the bug works 2. Make sure tests pass: prove t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t Signed-off-by: Björn Nylén Signed-off-by: Josef Moravec Signed-off-by: Nick Clemens --- .../Elasticsearch/QueryBuilder.pm | 15 +++++++ .../SearchEngine/Elasticsearch/QueryBuilder.t | 41 ++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index c65a4b56dd..252416cbf8 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -803,11 +803,26 @@ to ensure those parts are correct. sub _clean_search_term { my ( $self, $term ) = @_; + # Lookahead for checking if we are inside quotes + my $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)'; + # Some hardcoded searches (like with authorities) produce things like # 'an=123', when it ought to be 'an:123' for our purposes. $term =~ s/=/:/g; + $term = $self->_convert_index_strings_freeform($term); $term =~ s/[{}]/"/g; + + # Remove unbalanced quotes + my $unquoted = $term; + my $count = ($unquoted =~ tr/"/ /); + if ($count % 2 == 1) { + $term = $unquoted; + } + + # Remove unquoted colons that have whitespace on either side of them + $term =~ s/(\:[:\s]+|[:\s]+:)$lookahead//g; + return $term; } diff --git a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t index 1c7f73e3f5..4a901a4e84 100644 --- a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t +++ b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t @@ -17,7 +17,7 @@ use Modern::Perl; -use Test::More tests => 2; +use Test::More tests => 3; use Koha::SearchEngine::Elasticsearch::QueryBuilder; @@ -83,4 +83,41 @@ subtest '_split_query() tests' => sub { is_deeply(\@res, \@exp, 'quoted search terms surrounded by spaces correctly'); }; -1; \ No newline at end of file +subtest '_clean_search_term() tests' => sub { + plan tests => 10; + + my $qb; + ok( + $qb = Koha::SearchEngine::Elasticsearch::QueryBuilder->new({ 'index' => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX }), + 'Creating a new QueryBuilder object' + ); + + my $res = $qb->_clean_search_term('an=123'); + is($res, 'an:123', 'equals sign replaced with colon'); + + $res = $qb->_clean_search_term('"balanced quotes"'); + is($res, '"balanced quotes"', 'balanced quotes returned correctly'); + + $res = $qb->_clean_search_term('unbalanced quotes"'); + is($res, 'unbalanced quotes ', 'unbalanced quotes removed'); + + $res = $qb->_clean_search_term('"unbalanced "quotes"'); + is($res, ' unbalanced quotes ', 'unbalanced quotes removed'); + + $res = $qb->_clean_search_term('test : query'); + is($res, 'test query', 'dangling colon removed'); + + $res = $qb->_clean_search_term('test :: query'); + is($res, 'test query', 'dangling double colon removed'); + + $res = $qb->_clean_search_term('test "another : query"'); + is($res, 'test "another : query"', 'quoted dangling colon not removed'); + + $res = $qb->_clean_search_term('test {another part}'); + is($res, 'test "another part"', 'curly brackets replaced correctly'); + + $res = $qb->_clean_search_term('test {another part'); + is($res, 'test another part', 'unbalanced curly brackets replaced correctly'); +}; + +1; -- 2.39.5