From fc229c123ea5701d42bba152daac8cff70166eef Mon Sep 17 00:00:00 2001 From: Martin Renvoize Date: Wed, 1 Sep 2021 15:58:04 +0100 Subject: [PATCH] Bug 28316: (QA follow-up) Make clean_search_term public With all the work that's gone into improving the internal _clean_search_term method I feel we should expose it publically as it's going to be more widely helpful Signed-off-by: Martin Renvoize Signed-off-by: Martin Renvoize Signed-off-by: Jonathan Druart Signed-off-by: Kyle M Hall (cherry picked from commit 641e8e4096b8d55afb534d593743154807f77221) Signed-off-by: Victor Grousset/tuxayo --- .../Elasticsearch/QueryBuilder.pm | 12 ++--- .../SearchEngine/Elasticsearch/QueryBuilder.t | 48 +++++++++---------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index 150f67ff7c..a7cb7346d5 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -292,7 +292,7 @@ sub build_query_compat { my $ea = each_array( @$operands, @$operators, @index_params ); while ( my ( $oand, $otor, $index ) = $ea->() ) { next if ( !defined($oand) || $oand eq '' ); - $oand = $self->_clean_search_term($oand); + $oand = $self->clean_search_term($oand); $oand = $self->_truncate_terms($oand) if ($truncate); push @search_params, { operand => $oand, # the search terms @@ -445,7 +445,7 @@ sub build_authorities_query { my @tokens = $self->_split_query( $val ); foreach my $token ( @tokens ) { $token = $self->_truncate_terms( - $self->_clean_search_term( $token ) + $self->clean_search_term( $token ) ); } my $query = $self->_join_queries( @tokens ); @@ -644,7 +644,7 @@ sub _build_scan_query { terms => { field => $index . '__facet', order => { '_term' => 'asc' }, - include => $self->_create_regex_filter($self->_clean_search_term($term)) . '.*' + include => $self->_create_regex_filter($self->clean_search_term($term)) . '.*' } } }; @@ -909,9 +909,9 @@ sub _create_query_string { } @queries; } -=head2 _clean_search_term +=head2 clean_search_term - my $term = $self->_clean_search_term($term); + my $term = $self->clean_search_term($term); This cleans a search term by removing any funny characters that may upset ES and give us an error. It also calls L<_convert_index_strings_freeform> @@ -919,7 +919,7 @@ to ensure those parts are correct. =cut -sub _clean_search_term { +sub clean_search_term { my ( $self, $term ) = @_; # Lookahead for checking if we are inside quotes diff --git a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t index e9a1aa30ec..2a2e950867 100755 --- a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t +++ b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t @@ -186,7 +186,7 @@ subtest '_split_query() tests' => sub { is_deeply(\@res, \@exp, 'quoted search terms surrounded by spaces correctly'); }; -subtest '_clean_search_term() tests' => sub { +subtest 'clean_search_term() tests' => sub { plan tests => 24; my $qb; @@ -197,77 +197,77 @@ subtest '_clean_search_term() tests' => sub { t::lib::Mocks::mock_preference('QueryAutoTruncate', 0); - my $res = $qb->_clean_search_term('an=123'); + my $res = $qb->clean_search_term('an=123'); is($res, 'koha-auth-number:123', 'equals sign replaced with colon'); - $res = $qb->_clean_search_term('"balanced quotes"'); + $res = $qb->clean_search_term('"balanced quotes"'); is($res, '"balanced quotes"', 'balanced quotes returned correctly'); - $res = $qb->_clean_search_term('unbalanced quotes"'); + $res = $qb->clean_search_term('unbalanced quotes"'); is($res, 'unbalanced quotes ', 'unbalanced quotes removed'); - $res = $qb->_clean_search_term('"unbalanced "quotes"'); + $res = $qb->clean_search_term('"unbalanced "quotes"'); is($res, ' unbalanced quotes ', 'unbalanced quotes removed'); - $res = $qb->_clean_search_term(':test query'); + $res = $qb->clean_search_term(':test query'); is($res, 'test query', 'remove colon at the start'); - $res = $qb->_clean_search_term('test query\:'); + $res = $qb->clean_search_term('test query\:'); is($res, 'test query', 'remove colon at the end'); - $res = $qb->_clean_search_term('test : query'); + $res = $qb->clean_search_term('test : query'); is($res, 'test query', 'dangling colon removed'); - $res = $qb->_clean_search_term('test :: query'); + $res = $qb->clean_search_term('test :: query'); is($res, 'test query', 'dangling double colon removed'); - $res = $qb->_clean_search_term('test "another : query"'); + $res = $qb->clean_search_term('test "another : query"'); is($res, 'test "another : query"', 'quoted dangling colon not removed'); - $res = $qb->_clean_search_term('host-item:test:n'); + $res = $qb->clean_search_term('host-item:test:n'); is($res, 'host-item:test\:n', 'screen colons properly'); - $res = $qb->_clean_search_term('host-item:test:n:test:and more'); + $res = $qb->clean_search_term('host-item:test:n:test:and more'); is($res, 'host-item:test\:n\:test\:and more', 'screen multiple colons properly'); - $res = $qb->_clean_search_term('host-item:te st:n'); + $res = $qb->clean_search_term('host-item:te st:n'); is($res, 'host-item:te st:n', 'leave colons as they are'); - $res = $qb->_clean_search_term('test!'); + $res = $qb->clean_search_term('test!'); is($res, 'test', 'remove exclamation sign at the end of the line'); - $res = $qb->_clean_search_term('test! and more'); + $res = $qb->clean_search_term('test! and more'); is($res, 'test and more', 'remove exclamation sign at with space after it'); - $res = $qb->_clean_search_term('!test'); + $res = $qb->clean_search_term('!test'); is($res, '!test', 'exclamation sign left untouched'); - $res = $qb->_clean_search_term('test [123 TO 345]'); + $res = $qb->clean_search_term('test [123 TO 345]'); is($res, 'test [123 TO 345]', 'keep inculsive range untouched'); - $res = $qb->_clean_search_term('test [test TO TEST} [and] {123 TO 456]'); + $res = $qb->clean_search_term('test [test TO TEST} [and] {123 TO 456]'); is($res, 'test [test TO TEST} \[and\] {123 TO 456]', 'keep exclusive range untouched'); - $res = $qb->_clean_search_term('test [test TO TEST} ["[and] {123 TO 456]" "[balanced]"]'); + $res = $qb->clean_search_term('test [test TO TEST} ["[and] {123 TO 456]" "[balanced]"]'); is($res, 'test [test TO TEST} \["[and] {123 TO 456]" "[balanced]"\]', 'keep exclusive range untouched'); - $res = $qb->_clean_search_term('test[]test TO TEST] [ {123 to 345}}'); + $res = $qb->clean_search_term('test[]test TO TEST] [ {123 to 345}}'); is($res, 'test\[\]test TO TEST\] \[ \{123 to 345\}\}', 'screen all square and curly brackets'); t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'escape'); - $res = $qb->_clean_search_term('test inside regexps /this [a-z]/ and \/not [a-z]\/ and that [a-z] [a TO z]'); + $res = $qb->clean_search_term('test inside regexps /this [a-z]/ and \/not [a-z]\/ and that [a-z] [a TO z]'); is($res, 'test inside regexps \/this \[a-z\]\/ and \/not \[a-z\]\/ and that \[a-z\] [a TO z]', 'behaviour with QueryRegexEscapeOptions set to "escape"'); t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'dont_escape'); - $res = $qb->_clean_search_term('test inside regexps /this [a-z]/ /this2 [a-z]/ [but] /this3 [a-z]/ and \/not [a-z]\/ and that [a-z] [a TO z]'); + $res = $qb->clean_search_term('test inside regexps /this [a-z]/ /this2 [a-z]/ [but] /this3 [a-z]/ and \/not [a-z]\/ and that [a-z] [a TO z]'); is($res, 'test inside regexps /this [a-z]/ /this2 [a-z]/ \[but\] /this3 [a-z]/ and \/not \[a-z\]\/ and that \[a-z\] [a TO z]', 'behaviour with QueryRegexEscapeOptions set to "dont_escape"'); - $res = $qb->_clean_search_term('ti:test AND kw:test'); + $res = $qb->clean_search_term('ti:test AND kw:test'); is($res, 'title:test AND test', 'ti converted to title, kw converted to empty string, dangling colon removed with space preserved'); - $res = $qb->_clean_search_term('kw:test'); + $res = $qb->clean_search_term('kw:test'); is($res, 'test', 'kw converted to empty string, dangling colon is removed'); }; -- 2.39.5