From ce1a2f9159981780633466283a7d071f5d791983 Mon Sep 17 00:00:00 2001 From: Victor Grousset/tuxayo Date: Wed, 22 Dec 2021 20:47:24 +0100 Subject: [PATCH] Bug 28316: REVERT ALL: due to string freeze The follow-up bug 29284 introduces a new string. Both patchsets will be backported for next release. Signed-off-by: Victor Grousset/tuxayo --- .../Elasticsearch/QueryBuilder.pm | 79 +++---------------- .../SearchEngine/Elasticsearch/QueryBuilder.t | 78 +++++------------- 2 files changed, 31 insertions(+), 126 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index a7cb7346d5..a894782c18 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -292,7 +292,7 @@ sub build_query_compat { my $ea = each_array( @$operands, @$operators, @index_params ); while ( my ( $oand, $otor, $index ) = $ea->() ) { next if ( !defined($oand) || $oand eq '' ); - $oand = $self->clean_search_term($oand); + $oand = $self->_clean_search_term($oand); $oand = $self->_truncate_terms($oand) if ($truncate); push @search_params, { operand => $oand, # the search terms @@ -445,7 +445,7 @@ sub build_authorities_query { my @tokens = $self->_split_query( $val ); foreach my $token ( @tokens ) { $token = $self->_truncate_terms( - $self->clean_search_term( $token ) + $self->_clean_search_term( $token ) ); } my $query = $self->_join_queries( @tokens ); @@ -644,7 +644,7 @@ sub _build_scan_query { terms => { field => $index . '__facet', order => { '_term' => 'asc' }, - include => $self->_create_regex_filter($self->clean_search_term($term)) . '.*' + include => $self->_create_regex_filter($self->_clean_search_term($term)) . '.*' } } }; @@ -909,9 +909,9 @@ sub _create_query_string { } @queries; } -=head2 clean_search_term +=head2 _clean_search_term - my $term = $self->clean_search_term($term); + my $term = $self->_clean_search_term($term); This cleans a search term by removing any funny characters that may upset ES and give us an error. It also calls L<_convert_index_strings_freeform> @@ -919,7 +919,7 @@ to ensure those parts are correct. =cut -sub clean_search_term { +sub _clean_search_term { my ( $self, $term ) = @_; # Lookahead for checking if we are inside quotes @@ -930,6 +930,7 @@ sub clean_search_term { $term =~ s/=/:/g; $term = $self->_convert_index_strings_freeform($term); + $term =~ s/[{}]/"/g; # Remove unbalanced quotes my $unquoted = $term; @@ -937,68 +938,14 @@ sub clean_search_term { if ($count % 2 == 1) { $term = $unquoted; } - $term = $self->_query_regex_escape_process($term); - # because of _truncate_terms and if QueryAutoTruncate enabled - # we will have any special operators ruined by _truncate_terms: - # for ex. search for "test [6 TO 7]" will be converted to "test* [6* TO* 7]" - # so no reason to keep ranges in QueryAutoTruncate==true case: - my $truncate = C4::Context->preference("QueryAutoTruncate") || 0; - unless($truncate) { - # replace all ranges with any square/curly brackets combinations to temporary substitutions (ex: "{a TO b]"" -> "~~LC~~a TO b~~RS~~") - # (where L is for left and C is for Curly and so on) - $term =~ s/ - (?(?:[\\]{2})*) - (?\{|\[) - (? - [^\s\[\]\{\}]+\ TO\ [^\s\[\]\{\}]+ - (?\}|\]) - /$+{backslashes}.'~~L'.($+{leftbracket} eq '[' ? 'S':'C').'~~'.$+{ranges}.'~~R'.($+{rightbracket} eq ']' ? 'S':'C').'~~'/gex; - } - # save all regex contents away before escaping brackets: - # (same trick as with brackets above, just RE for 'RegularExpression') - my @saved_regexes; - my $rgx_i = 0; - while( - $term =~ s@( - (?_query_regex_escape_process($term); - # remove leading and trailing colons mixed with optional slashes and spaces - $term =~ s/^([\s\\]*:\s*)+//; - $term =~ s/([\s\\]*:\s*)+$//; - # remove unquoted colons that have whitespace on either side of them - $term =~ s/([\s\\]*:\s*)+(\s+)$lookahead/$2/g; - $term =~ s/(\s+)([\s\\]*:\s*)+$lookahead/$1/g; - # replace with spaces all repeated colons no matter how they surrounded with spaces and slashes - $term =~ s/([\s\\]*:\s*){2,}$lookahead/ /g; - # screen all followups for colons after first colon, - # and correctly ignore unevenly backslashed: - $term =~ s/((? sub { is_deeply(\@res, \@exp, 'quoted search terms surrounded by spaces correctly'); }; -subtest 'clean_search_term() tests' => sub { - plan tests => 24; +subtest '_clean_search_term() tests' => sub { + plan tests => 12; my $qb; ok( @@ -195,80 +195,38 @@ subtest 'clean_search_term() tests' => sub { 'Creating a new QueryBuilder object' ); - t::lib::Mocks::mock_preference('QueryAutoTruncate', 0); - - my $res = $qb->clean_search_term('an=123'); + my $res = $qb->_clean_search_term('an=123'); is($res, 'koha-auth-number:123', 'equals sign replaced with colon'); - $res = $qb->clean_search_term('"balanced quotes"'); + $res = $qb->_clean_search_term('"balanced quotes"'); is($res, '"balanced quotes"', 'balanced quotes returned correctly'); - $res = $qb->clean_search_term('unbalanced quotes"'); + $res = $qb->_clean_search_term('unbalanced quotes"'); is($res, 'unbalanced quotes ', 'unbalanced quotes removed'); - $res = $qb->clean_search_term('"unbalanced "quotes"'); + $res = $qb->_clean_search_term('"unbalanced "quotes"'); is($res, ' unbalanced quotes ', 'unbalanced quotes removed'); - $res = $qb->clean_search_term(':test query'); - is($res, 'test query', 'remove colon at the start'); - - $res = $qb->clean_search_term('test query\:'); - is($res, 'test query', 'remove colon at the end'); + $res = $qb->_clean_search_term('test : query'); + is($res, 'test query', 'dangling colon removed'); - $res = $qb->clean_search_term('test : query'); - is($res, 'test query', 'dangling colon removed'); + $res = $qb->_clean_search_term('test :: query'); + is($res, 'test query', 'dangling double colon removed'); - $res = $qb->clean_search_term('test :: query'); - is($res, 'test query', 'dangling double colon removed'); - - $res = $qb->clean_search_term('test "another : query"'); + $res = $qb->_clean_search_term('test "another : query"'); is($res, 'test "another : query"', 'quoted dangling colon not removed'); - $res = $qb->clean_search_term('host-item:test:n'); - is($res, 'host-item:test\:n', 'screen colons properly'); - - $res = $qb->clean_search_term('host-item:test:n:test:and more'); - is($res, 'host-item:test\:n\:test\:and more', 'screen multiple colons properly'); - - $res = $qb->clean_search_term('host-item:te st:n'); - is($res, 'host-item:te st:n', 'leave colons as they are'); - - $res = $qb->clean_search_term('test!'); - is($res, 'test', 'remove exclamation sign at the end of the line'); - - $res = $qb->clean_search_term('test! and more'); - is($res, 'test and more', 'remove exclamation sign at with space after it'); - - $res = $qb->clean_search_term('!test'); - is($res, '!test', 'exclamation sign left untouched'); - - $res = $qb->clean_search_term('test [123 TO 345]'); - is($res, 'test [123 TO 345]', 'keep inculsive range untouched'); - - $res = $qb->clean_search_term('test [test TO TEST} [and] {123 TO 456]'); - is($res, 'test [test TO TEST} \[and\] {123 TO 456]', 'keep exclusive range untouched'); - - $res = $qb->clean_search_term('test [test TO TEST} ["[and] {123 TO 456]" "[balanced]"]'); - is($res, 'test [test TO TEST} \["[and] {123 TO 456]" "[balanced]"\]', 'keep exclusive range untouched'); - - $res = $qb->clean_search_term('test[]test TO TEST] [ {123 to 345}}'); - is($res, 'test\[\]test TO TEST\] \[ \{123 to 345\}\}', 'screen all square and curly brackets'); - - t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'escape'); - - $res = $qb->clean_search_term('test inside regexps /this [a-z]/ and \/not [a-z]\/ and that [a-z] [a TO z]'); - is($res, 'test inside regexps \/this \[a-z\]\/ and \/not \[a-z\]\/ and that \[a-z\] [a TO z]', 'behaviour with QueryRegexEscapeOptions set to "escape"'); - - t::lib::Mocks::mock_preference('QueryRegexEscapeOptions', 'dont_escape'); + $res = $qb->_clean_search_term('test {another part}'); + is($res, 'test "another part"', 'curly brackets replaced correctly'); - $res = $qb->clean_search_term('test inside regexps /this [a-z]/ /this2 [a-z]/ [but] /this3 [a-z]/ and \/not [a-z]\/ and that [a-z] [a TO z]'); - is($res, 'test inside regexps /this [a-z]/ /this2 [a-z]/ \[but\] /this3 [a-z]/ and \/not \[a-z\]\/ and that \[a-z\] [a TO z]', 'behaviour with QueryRegexEscapeOptions set to "dont_escape"'); + $res = $qb->_clean_search_term('test {another part'); + is($res, 'test another part', 'unbalanced curly brackets replaced correctly'); - $res = $qb->clean_search_term('ti:test AND kw:test'); + $res = $qb->_clean_search_term('ti:test AND kw:test'); is($res, 'title:test AND test', 'ti converted to title, kw converted to empty string, dangling colon removed with space preserved'); - $res = $qb->clean_search_term('kw:test'); - is($res, 'test', 'kw converted to empty string, dangling colon is removed'); + $res = $qb->_clean_search_term('kw:test'); + is($res, 'test', 'kw converted to empty string, dangling colon removed with space preserved'); }; subtest '_join_queries' => sub { -- 2.39.5