Browse Source

Bug 19604: Elasticsearch Fixes for build_authorities_query for auth searching

To test:
1 - Do some authority searches in Zebra
2 - Switch to ES and repeat, results will vary and some may fail
3 - Apply patch and dependencies
4 - Reindex ES
5 - Repeat searches, they should suceed and results should be similar to
Zebra
6 - Slight differences are okay, but results should (mostly) meet
expectations

A few notes:
We add a 'normalizer' to ensure we get a single token from the heading
indexes, this makes 'starts with' work as expcted
We switch to 'AND' for fields searched from cataloging editor - this
matches Zebra results
We force the '__sort' fields for sorting - if sorting looks wrong try
reducing the heading field to a single subfield - this will need to be
addressed on a future bug (multiple subfields create an array, ES sorts
        those randomly)

Signed-off-by: Nicolas Legrand <nicolas.legrand@bulac.fr>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
18.11.x
Nick Clemens 4 years ago
parent
commit
f983c1e3f7
  1. 55
      Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
  2. 3
      admin/searchengine/elasticsearch/field_config.yaml
  3. 3
      admin/searchengine/elasticsearch/index_config.yaml

55
Koha/SearchEngine/Elasticsearch/QueryBuilder.pm

@ -290,40 +290,46 @@ sub build_authorities_query {
# Start by making the query parts
my @query_parts;
my @filter_parts;
foreach my $s ( @{ $search->{searches} } ) {
my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
$wh = '_all' if $wh eq '';
if ( $op eq 'is' || $op eq '=' ) {
# look for something that matches completely
# look for something that matches a term completely
# note, '=' is about numerical vals. May need special handling.
# _allphrase is a special field that only groups the exact
# matches. Also, we lowercase our search because the ES
# Also, we lowercase our search because the ES
# index lowercases its values, and term searches don't get the
# search analyzer applied to them.
push @filter_parts, { term => { "$wh.phrase" => lc $val } };
push @query_parts, { term => {"$wh.phrase" => lc $val} };
}
elsif ( $op eq 'exact' ) {
# left and right truncation, otherwise an exact phrase
push @query_parts, { match_phrase => { $wh => $val } };
push @query_parts, { match_phrase => {"$wh.phrase" => lc $val} };
}
elsif ( $op eq 'start' ) {
# startswith search
push @query_parts, { wildcard => { "$wh.phrase" => lc "$val*" } };
# startswith search, uses lowercase untokenized version of heading
push @query_parts, { prefix => {"$wh.lc_raw" => lc $val} };
}
else {
# regular wordlist stuff
push @query_parts, { match => { $wh => $val } };
# push @query_parts, { match => {$wh => { query => $val, operator => 'and' }} };
my @values = split(' ',$val);
foreach $val (@values) {
push @query_parts, { wildcard => { "$wh.phrase" => "*" . lc $val . "*" } };
}
}
}
# Merge the query and filter parts appropriately
# 'should' behaves like 'or', if we want 'and', use 'must'
my $query_part = { bool => { should => \@query_parts } };
my $filter_part = { bool => { should => \@filter_parts } };
# Merge the query parts appropriately
# 'should' behaves like 'or'
# 'must' behaves like 'and'
# Zebra results seem to match must so using that here
my $query = { query=>
{ bool =>
{ must => \@query_parts }
}
};
# We need to add '.phrase' to all the sort headings otherwise it'll sort
# based on the tokenised form.
@ -336,20 +342,9 @@ sub build_authorities_query {
$search->{sort} = \%s;
}
# extract the sort stuff
my %sort;
%sort = ( sort => [ $search->{sort} ] ) if exists $search->{sort};
my $query;
if (@filter_parts) {
$query =
{ query =>
{ filtered => { filter => $filter_part, query => $query_part } }
};
}
else {
$query = { query => $query_part };
}
$query = { %$query, %sort };
# add the sort stuff
$query->{sort} = [ $search->{sort} ] if exists $search->{sort};
return $query;
}
@ -446,7 +441,7 @@ sub build_authorities_query_compat {
my %sort;
my $sort_field =
( $orderby =~ /^Heading/ ) ? 'Heading'
( $orderby =~ /^Heading/ ) ? 'Heading__sort'
: ( $orderby =~ /^Auth/ ) ? 'Local-Number'
: undef;
if ($sort_field) {

3
admin/searchengine/elasticsearch/field_config.yaml

@ -39,6 +39,9 @@ search:
search_analyzer: analyser_phrase
raw:
type: keyword
lc_raw:
type: keyword
normalizer: my_normalizer
copy_to: _all
# Facets
facet:

3
admin/searchengine/elasticsearch/index_config.yaml

@ -25,6 +25,9 @@ index:
type: custom
filter:
- icu_folding
my_normalizer:
type: custom
char_filter: icu_normalizer
char_filter:
# The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer.
punctuation:

Loading…
Cancel
Save