From d099dc13bb4b9aa991caa0feaf24bed59c3c4844 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Fri, 6 Oct 2017 15:37:38 +0000 Subject: [PATCH] Bug 18374: (follow-up) Add support and tests for quoted strings To test: 1 - prove t/db_dependent/Koha_SearchEngine_Elasticsearch_Search.t 2 - do some searches in staff client and test results Signed-off-by: Julian Maurice Signed-off-by: Jonathan Druart --- .../Elasticsearch/QueryBuilder.pm | 26 ++++++++++--- .../Koha_SearchEngine_Elasticsearch_Search.t | 38 ++++++++++++++++++- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index 5635c85064..d7a83e272e 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -788,7 +788,7 @@ sub _sort_field { my $query = $self->_truncate_terms($query); Given a string query this function appends '*' wildcard to all terms except -operands. +operands and double quoted strings. =cut @@ -796,11 +796,25 @@ sub _truncate_terms { my ( $self, $query ) = @_; my @stops = qw/and or not/; my @new_terms; - my @split_query = split /[\(\s\)]/, $query; - foreach my $term (@split_query) { - next if ( $term eq '' || $term eq ' ' ); - $term .= "*" unless ( ( grep { lc($term) =~ /^$_$/ } @stops ) || ( $term =~ /\*$/ ) ); - push @new_terms, $term; + my @quote_split = split /(["])([^"]+)\1/, $query; + #Above splits the string based on matching pairs of double quotes + #In practice we get ('','"','donald duck',' ','"','the mouse',' and pete') + #given the string '"donald duck" "the mouse" and pete' + #so we ignore empties, quote the ones after a '"' and split the rest on spaces + for (my $i=0; $i < @quote_split; $i++ ) { + next if ( $quote_split[$i] eq '' || $quote_split[$i] eq ' ' ); + if ( $quote_split[$i] eq '"' ){ + $i++; + $quote_split[$i] = '"'.$quote_split[$i].'"'; + push @new_terms, $quote_split[$i] + } else { + my @space_split = split /[\(\s\)]/, $quote_split[$i]; + foreach my $term (@space_split) { + next if ( $term eq '' || $term eq ' ' ); + $term .= "*" unless ( ( grep { lc($term) =~ /^$_$/ } @stops ) || ( $term =~ /\*$/ ) ); + push @new_terms, $term; + } + } } $query = join ' ', @new_terms; return $query; diff --git a/t/db_dependent/Koha_SearchEngine_Elasticsearch_Search.t b/t/db_dependent/Koha_SearchEngine_Elasticsearch_Search.t index f547fcbc17..b54a5f0401 100644 --- a/t/db_dependent/Koha_SearchEngine_Elasticsearch_Search.t +++ b/t/db_dependent/Koha_SearchEngine_Elasticsearch_Search.t @@ -81,7 +81,7 @@ subtest 'json2marc' => sub { }; subtest 'build_query tests' => sub { - plan tests => 10; + plan tests => 15; t::lib::Mocks::mock_preference('DisplayLibraryFacets','both'); my $query = $builder->build_query(); @@ -135,5 +135,41 @@ subtest 'build_query tests' => sub { "(donald* duck*)", "query with '*' is unaltered when QueryAutoTruncate is enabled" ); + + ( undef, $query ) = $builder->build_query_compat( undef, ['donald duck and the mouse'] ); + is( + $query->{query}{query_string}{query}, + "(donald* duck* and the* mouse*)", + "individual words are all truncated and stopwords ignored" + ); + + ( undef, $query ) = $builder->build_query_compat( undef, ['*'] ); + is( + $query->{query}{query_string}{query}, + "(*)", + "query of just '*' is unaltered when QueryAutoTruncate is enabled" + ); + + ( undef, $query ) = $builder->build_query_compat( undef, ['"donald duck"'] ); + is( + $query->{query}{query_string}{query}, + '("donald duck")', + "query with quotes is unaltered when QueryAutoTruncate is enabled" + ); + + + ( undef, $query ) = $builder->build_query_compat( undef, ['"donald duck" and "the mouse"'] ); + is( + $query->{query}{query_string}{query}, + '("donald duck" and "the mouse")', + "all quoted strings are unaltered if more than one in query" + ); + + ( undef, $query ) = $builder->build_query_compat( undef, ['barcode:123456'] ); + is( + $query->{query}{query_string}{query}, + '(barcode:123456*)', + "query of specific field is truncated" + ); }; -- 2.39.5