From 4c4d982ae45a3763fd70faedf1bbac225213de19 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Mon, 3 Apr 2017 14:30:44 -0400 Subject: [PATCH] Bug 18374: Respect QueryAutoTruncate syspref in Elasticsearch This patchset adds a subroutine '_truncate_terms' to the ES QueryParser. If QueryAutoTruncate is enabled this function will be called for any search to add wildcard '*' to all terms To test: 1 - Enable Elasticsearch and have some records indexed 2 - Search for partial terms 3 - Note they fail unless '*' is appended 4 - Apply patch, leave QueryAutoTruncate disabled 5 - Note partial term searches still fail 6 - Enable QueryAutoTruncate 7 - Note partial term searches succeed 8 - Do some regular and advanced searches to make sure results are as expected Signed-off-by: Tomas Cohen Arazi Signed-off-by: Julian Maurice Signed-off-by: Jonathan Druart --- .../Elasticsearch/QueryBuilder.pm | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index 4169211857..9e8210982c 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -715,11 +715,14 @@ to ensure those parts are correct. sub _clean_search_term { my ( $self, $term ) = @_; + my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0; + # Some hardcoded searches (like with authorities) produce things like # 'an=123', when it ought to be 'an:123' for our purposes. $term =~ s/=/:/g; $term = $self->_convert_index_strings_freeform($term); $term =~ s/[{}]/"/g; + $term = $self->_truncate_terms($term) if ( $auto_truncation ); return $term; } @@ -780,4 +783,27 @@ sub _sort_field { return $f; } +=head2 _truncate_terms + + my $query = $self->_truncate_terms($query); + +Given a string query this function appends '*' wildcard to all terms except +operands. + +=cut + +sub _truncate_terms { + my ($self, $query) = @_; + my @stops = qw/and or not/; + my @new_terms; + my @split_query = split /[\(\s\)]/, $query ; + foreach my $term ( @split_query ) { + next if ($term eq '' || $term eq ' ' ) ; + $term .= "*" unless ( ( grep { lc($term) =~ /^$_$/ } @stops ) || ($term =~ /\*$/ ) ); + push @new_terms, $term; + } + $query=join ' ' ,@new_terms; + return $query; +} + 1; -- 2.39.5