Koha/SearchEngine/Elasticsearch/QueryBuilder.pm

   1 package Koha::SearchEngine::Elasticsearch::QueryBuilder;
   2
   3 # This file is part of Koha.
   4 #
   5 # Copyright 2014 Catalyst IT Ltd.
   6 #
   7 # Koha is free software; you can redistribute it and/or modify it
   8 # under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 3 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # Koha is distributed in the hope that it will be useful, but
  13 # WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  19
  20 =head1 NAME
  21
  22 Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch
  23 query objects from user-supplied queries
  24
  25 =head1 DESCRIPTION
  26
  27 This provides the functions that take a user-supplied search query, and
  28 provides something that can be given to elasticsearch to get answers.
  29
  30 =head1 SYNOPSIS
  31
  32     use Koha::SearchEngine::Elasticsearch::QueryBuilder;
  33     $builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
  34     my $simple_query = $builder->build_query("hello");
  35     # This is currently undocumented because the original code is undocumented
  36     my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
  37
  38 =head1 METHODS
  39
  40 =cut
  41
  42 use base qw(Koha::SearchEngine::Elasticsearch);
  43 use JSON;
  44 use List::MoreUtils qw( each_array );
  45 use Modern::Perl;
  46 use URI::Escape qw( uri_escape_utf8 );
  47
  48 use C4::Context;
  49 use Koha::Exceptions;
  50 use Koha::Caches;
  51
  52 our %index_field_convert = (
  53     'kw' => '',
  54     'ab' => 'abstract',
  55     'au' => 'author',
  56     'lcn' => 'local-classification',
  57     'callnum' => 'local-classification',
  58     'record-type' => 'rtype',
  59     'mc-rtype' => 'rtype',
  60     'mus' => 'rtype',
  61     'lc-card' => 'lc-card-number',
  62     'sn' => 'local-number',
  63     'biblionumber' => 'local-number',
  64     'yr' => 'date-of-publication',
  65     'pubdate' => 'date-of-publication',
  66     'acqdate' => 'date-of-acquisition',
  67     'date/time-last-modified' => 'date-time-last-modified',
  68     'dtlm' => 'date-time-last-modified',
  69     'diss' => 'dissertation-information',
  70     'nb' => 'isbn',
  71     'ns' => 'issn',
  72     'music-number' => 'identifier-publisher-for-music',
  73     'number-music-publisher' => 'identifier-publisher-for-music',
  74     'music' => 'identifier-publisher-for-music',
  75     'ident' => 'identifier-standard',
  76     'cpn' => 'corporate-name',
  77     'cfn' => 'conference-name',
  78     'pn' => 'personal-name',
  79     'pb' => 'publisher',
  80     'pv' => 'provider',
  81     'nt' => 'note',
  82     'notes' => 'note',
  83     'rcn' => 'record-control-number',
  84     'cni' => 'control-number-identifier',
  85     'su' => 'subject',
  86     'su-to' => 'subject',
  87     #'su-geo' => 'subject',
  88     'su-ut' => 'subject',
  89     'ti' => 'title',
  90     'se' => 'title-series',
  91     'ut' => 'title-uniform',
  92     'an' => 'koha-auth-number',
  93     'authority-number' => 'koha-auth-number',
  94     'at' => 'authtype',
  95     'he' => 'heading',
  96     'rank' => 'relevance',
  97     'phr' => 'st-phrase',
  98     'wrdl' => 'st-word-list',
  99     'rt' => 'right-truncation',
 100     'rtrn' => 'right-truncation',
 101     'ltrn' => 'left-truncation',
 102     'rltrn' => 'left-and-right',
 103     'mc-itemtype' => 'itemtype',
 104     'mc-ccode' => 'ccode',
 105     'branch' => 'homebranch',
 106     'mc-loc' => 'location',
 107     'loc' => 'location',
 108     'stocknumber' => 'number-local-acquisition',
 109     'inv' => 'number-local-acquisition',
 110     'bc' => 'barcode',
 111     'mc-itype' => 'itype',
 112     'aub' => 'author-personal-bibliography',
 113     'auo' => 'author-in-order',
 114     'ff8-22' => 'ta',
 115     'aud' => 'ta',
 116     'audience' => 'ta',
 117     'frequency-code' => 'ff8-18',
 118     'illustration-code' => 'ff8-18-21',
 119     'regularity-code' => 'ff8-19',
 120     'type-of-serial' => 'ff8-21',
 121     'format' => 'ff8-23',
 122     'conference-code' => 'ff8-29',
 123     'festschrift-indicator' => 'ff8-30',
 124     'index-indicator' => 'ff8-31',
 125     'fiction' => 'lf',
 126     'fic' => 'lf',
 127     'literature-code' => 'lf',
 128     'biography' => 'bio',
 129     'ff8-34' => 'bio',
 130     'biography-code' => 'bio',
 131     'l-format' => 'ff7-01-02',
 132     'lex' => 'lexile-number',
 133     'hi' => 'host-item-number',
 134     'itu' => 'index-term-uncontrolled',
 135     'itg' => 'index-term-genre',
 136 );
 137 my $field_name_pattern = '[\w\-]+';
 138 my $multi_field_pattern = "(?:\\.$field_name_pattern)*";
 139
 140 =head2 get_index_field_convert
 141
 142     my @index_params = Koha::SearchEngine::Elasticsearch::QueryBuilder->get_index_field_convert();
 143
 144 Converts zebra-style search index notation into elasticsearch-style.
 145
 146 C<@indexes> is an array of index names, as presented to L<build_query_compat>,
 147 and it returns something that can be sent to L<build_query>.
 148
 149 B<TODO>: this will pull from the elasticsearch mappings table to figure out
 150 types.
 151
 152 =cut
 153
 154 sub get_index_field_convert() {
 155     return \%index_field_convert;
 156 }
 157
 158 =head2 build_query
 159
 160     my $simple_query = $builder->build_query("hello", %options)
 161
 162 This will build a query that can be issued to elasticsearch from the provided
 163 string input. This expects a lucene style search form (see
 164 L<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax>
 165 for details.)
 166
 167 It'll make an attempt to respect the various query options.
 168
 169 Additional options can be provided with the C<%options> hash.
 170
 171 =over 4
 172
 173 =item sort
 174
 175 This should be an arrayref of hashrefs, each containing a C<field> and an
 176 C<direction> (optional, defaults to C<asc>.) The results will be sorted
 177 according to these values. Valid values for C<direction> are 'asc' and 'desc'.
 178
 179 =back
 180
 181 =cut
 182
 183 sub build_query {
 184     my ( $self, $query, %options ) = @_;
 185
 186     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 187     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 188     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 189
 190     $query = '*' unless defined $query;
 191
 192     my $res;
 193     my $fields = $self->_search_fields({
 194         is_opac => $options{is_opac},
 195         weighted_fields => $options{weighted_fields},
 196     });
 197     if ($options{whole_record}) {
 198         push @$fields, 'marc_data_array.*';
 199     }
 200     $res->{query} = {
 201         query_string => {
 202             query            => $query,
 203             fuzziness        => $fuzzy_enabled ? 'auto' : '0',
 204             default_operator => 'AND',
 205             fields           => $fields,
 206             lenient          => JSON::true,
 207             analyze_wildcard => JSON::true,
 208         }
 209     };
 210     $res->{query}->{query_string}->{type} = 'cross_fields' if C4::Context->preference('ElasticsearchCrossFields');
 211
 212     if ( $options{sort} ) {
 213         foreach my $sort ( @{ $options{sort} } ) {
 214             my ( $f, $d ) = @$sort{qw/ field direction /};
 215             die "Invalid sort direction, $d"
 216               if $d && ( $d ne 'asc' && $d ne 'desc' );
 217             $d = 'asc' unless $d;
 218
 219             $f = $self->_sort_field($f);
 220             push @{ $res->{sort} }, { $f => { order => $d } };
 221         }
 222     }
 223
 224     # See _convert_facets in Search.pm for how these get turned into
 225     # things that Koha can use.
 226     my $size = C4::Context->preference('FacetMaxCount');
 227     $res->{aggregations} = {
 228         author         => { terms => { field => "author__facet" , size => $size } },
 229         subject        => { terms => { field => "subject__facet", size => $size } },
 230         itype          => { terms => { field => "itype__facet", size => $size} },
 231         location       => { terms => { field => "location__facet", size => $size } },
 232         'su-geo'       => { terms => { field => "su-geo__facet", size => $size} },
 233         'title-series' => { terms => { field => "title-series__facet", size => $size } },
 234         ccode          => { terms => { field => "ccode__facet", size => $size } },
 235         ln             => { terms => { field => "ln__facet", size => $size } },
 236     };
 237
 238     my $display_library_facets = C4::Context->preference('DisplayLibraryFacets');
 239     if (   $display_library_facets eq 'both'
 240         or $display_library_facets eq 'home' ) {
 241         $res->{aggregations}{homebranch} = { terms => { field => "homebranch__facet", size => $size } };
 242     }
 243     if (   $display_library_facets eq 'both'
 244         or $display_library_facets eq 'holding' ) {
 245         $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet", size => $size } };
 246     }
 247     return $res;
 248 }
 249
 250 =head2 build_query_compat
 251
 252     my (
 253         $error,             $query, $simple_query, $query_cgi,
 254         $query_desc,        $limit, $limit_cgi,    $limit_desc,
 255         $stopwords_removed, $query_type
 256       )
 257       = $builder->build_query_compat( \@operators, \@operands, \@indexes,
 258         \@limits, \@sort_by, $scan, $lang, $params );
 259
 260 This handles a search using the same api as L<C4::Search::buildQuery> does.
 261
 262 A very simple query will go in with C<$operands> set to ['query'], and
 263 C<$sort_by> set to ['pubdate_dsc']. This simple case will return with
 264 C<$query> set to something that can perform the search, C<$simple_query>
 265 set to just the search term, C<$query_cgi> set to something that can
 266 reproduce this search, and C<$query_desc> set to something else.
 267
 268 =cut
 269
 270 sub build_query_compat {
 271     my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan,
 272         $lang, $params )
 273       = @_;
 274
 275     my $query;
 276     my $query_str = '';
 277     my $search_param_query_str = '';
 278     my $limits = ();
 279     if ( $scan ) {
 280         ($query, $query_str) = $self->_build_scan_query( $operands, $indexes );
 281         $search_param_query_str = $query_str;
 282     } else {
 283         my @sort_params  = $self->_convert_sort_fields(@$sort_by);
 284         my @index_params = $self->_convert_index_fields(@$indexes);
 285         $limits       = $self->_fix_limit_special_cases($orig_limits);
 286         if ( $params->{suppress} ) { push @$limits, "suppress:false"; }
 287         # Merge the indexes in with the search terms and the operands so that
 288         # each search thing is a handy unit.
 289         unshift @$operators, undef;    # The first one can't have an op
 290         my @search_params;
 291         my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
 292         my $ea = each_array( @$operands, @$operators, @index_params );
 293         while ( my ( $oand, $otor, $index ) = $ea->() ) {
 294             next if ( !defined($oand) || $oand eq '' );
 295             $oand = $self->clean_search_term($oand);
 296             $oand = $self->_truncate_terms($oand) if ($truncate);
 297             push @search_params, {
 298                 operand => $oand,      # the search terms
 299                 operator => defined($otor) ? uc $otor : undef,    # AND and so on
 300                 $index ? %$index : (),
 301             };
 302         }
 303
 304         # We build a string query from limits and the queries. An alternative
 305         # would be to pass them separately into build_query and let it build
 306         # them into a structured ES query itself. Maybe later, though that'd be
 307         # more robust.
 308         my @search_param_query_array = $self->_create_query_string(@search_params);
 309         $search_param_query_str = join( ' ', @search_param_query_array );
 310         my $search_param_limit_str =
 311           $self->_join_queries( $self->_convert_index_strings(@$limits) );
 312         if ( @search_param_query_array > 1 && $search_param_limit_str ) {
 313             $search_param_query_str = "($search_param_query_str)";
 314         }
 315         $query_str = join( ' AND ',
 316             $search_param_query_str || (),
 317             $search_param_limit_str || () );
 318
 319         # If there's no query on the left, let's remove the junk left behind
 320         $query_str =~ s/^ AND //;
 321         my %options;
 322         $options{sort} = \@sort_params;
 323         $options{is_opac} = $params->{is_opac};
 324         $options{weighted_fields} = $params->{weighted_fields};
 325         $options{whole_record} = $params->{whole_record};
 326         $query = $self->build_query( $query_str, %options );
 327     }
 328
 329     # We roughly emulate the CGI parameters of the zebra query builder
 330     my $query_cgi = '';
 331     shift @$operators; # Shift out the one we unshifted before
 332     my $ea = each_array( @$operands, @$operators, @$indexes );
 333     while ( my ( $oand, $otor, $index ) = $ea->() ) {
 334         $query_cgi .= '&' if $query_cgi;
 335         $query_cgi .= 'idx=' . uri_escape_utf8( $index // '') . '&q=' . uri_escape_utf8( $oand );
 336         $query_cgi .= '&op=' . uri_escape_utf8( $otor ) if $otor;
 337     }
 338     $query_cgi .= '&scan=1' if ( $scan );
 339
 340     my $simple_query;
 341     $simple_query = $operands->[0] if @$operands == 1;
 342     my $query_desc;
 343     if ( $simple_query ) {
 344         $query_desc = $simple_query;
 345     } else {
 346         $query_desc = $search_param_query_str;
 347     }
 348     my $limit     = $self->_join_queries( $self->_convert_index_strings(@$limits));
 349     my $limit_cgi = ( $orig_limits and @$orig_limits )
 350       ? '&limit=' . join( '&limit=', map { uri_escape_utf8($_) } @$orig_limits )
 351       : '';
 352     my $limit_desc;
 353     $limit_desc = "$limit" if $limit;
 354
 355     return (
 356         undef,  $query,     $simple_query, $query_cgi, $query_desc,
 357         $limit, $limit_cgi, $limit_desc,   undef,      undef
 358     );
 359 }
 360
 361 =head2 build_authorities_query
 362
 363     my $query = $builder->build_authorities_query(\%search);
 364
 365 This takes a nice description of an authority search and turns it into a black-box
 366 query that can then be passed to the appropriate searcher.
 367
 368 The search description is a hashref that looks something like:
 369
 370     {
 371         searches => [
 372             {
 373                 where    => 'Heading',    # search the main entry
 374                 operator => 'exact',        # require an exact match
 375                 value    => 'frogs',        # the search string
 376             },
 377             {
 378                 where    => '',             # search all entries
 379                 operator => '',             # default keyword, right truncation
 380                 value    => 'pond',
 381             },
 382         ],
 383         sort => {
 384             field => 'Heading',
 385             order => 'desc',
 386         },
 387         authtypecode => 'TOPIC_TERM',
 388     }
 389
 390 =cut
 391
 392 sub build_authorities_query {
 393     my ( $self, $search ) = @_;
 394
 395     # Start by making the query parts
 396     my @query_parts;
 397
 398     foreach my $s ( @{ $search->{searches} } ) {
 399         my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
 400         if ( defined $op && ($op eq 'is' || $op eq '=' || $op eq 'exact') ) {
 401             if ($wh) {
 402                 # Match the whole field, case insensitive, UTF normalized.
 403                 push @query_parts, { term => { "$wh.ci_raw" => $val } };
 404             }
 405             else {
 406                 # Match the whole field for all searchable fields, case insensitive,
 407                 # UTF normalized.
 408                 # Given that field data is "The quick brown fox"
 409                 # "The quick brown fox" and "the quick brown fox" will match
 410                 # but not "quick brown fox".
 411                 push @query_parts, {
 412                     multi_match => {
 413                         query => $val,
 414                         fields => $self->_search_fields({ subfield => 'ci_raw' }),
 415                     }
 416                 };
 417             }
 418         }
 419         elsif ( defined $op && $op eq 'start') {
 420             # Match the prefix within a field for all searchable fields.
 421             # Given that field data is "The quick brown fox"
 422             # "The quick bro" will match, but not "quick bro"
 423
 424             # Does not seems to be a multi prefix query
 425             # so we need to create one
 426             if ($wh) {
 427                 # Match prefix of the field.
 428                 push @query_parts, { prefix => {"$wh.ci_raw" => $val} };
 429             }
 430             else {
 431                 my @prefix_queries;
 432                 foreach my $field (@{$self->_search_fields()}) {
 433                     push @prefix_queries, {
 434                         prefix => { "$field.ci_raw" => $val }
 435                     };
 436                 }
 437                 push @query_parts, {
 438                     'bool' => {
 439                         'should' => \@prefix_queries,
 440                         'minimum_should_match' => 1
 441                     }
 442                 };
 443             }
 444         }
 445         else {
 446             # Query all searchable fields.
 447             # Given that field data is "The quick brown fox"
 448             # a search containing any of the words will match, regardless
 449             # of order.
 450
 451             my @tokens = $self->_split_query( $val );
 452             foreach my $token ( @tokens ) {
 453                 $token = $self->_truncate_terms(
 454                     $self->clean_search_term( $token )
 455                 );
 456             }
 457             my $query = $self->_join_queries( @tokens );
 458             my $query_string = {
 459                 query            => $query,
 460                 lenient          => JSON::true,
 461                 analyze_wildcard => JSON::true,
 462             };
 463             if ($wh) {
 464                 $query_string->{default_field} = $wh;
 465             }
 466             else {
 467                 $query_string->{fields} = $self->_search_fields();
 468             }
 469             push @query_parts, { query_string => $query_string };
 470         }
 471     }
 472
 473     # Merge the query parts appropriately
 474     # 'should' behaves like 'or'
 475     # 'must' behaves like 'and'
 476     # Zebra behaviour seem to match must so using that here
 477     my $elastic_query = {};
 478     $elastic_query->{bool}->{must} = \@query_parts;
 479
 480     # Filter by authtypecode if set
 481     if ($search->{authtypecode}) {
 482         $elastic_query->{bool}->{filter} = {
 483             term => {
 484                 "authtype.raw" => $search->{authtypecode}
 485             }
 486         };
 487     }
 488
 489     my $query = {
 490         query => $elastic_query
 491     };
 492
 493     # Add the sort stuff
 494     $query->{sort} = [ $search->{sort} ] if exists $search->{sort};
 495
 496     return $query;
 497 }
 498
 499 =head2 build_authorities_query_compat
 500
 501     my ($query) =
 502       $builder->build_authorities_query_compat( \@marclist, \@and_or,
 503         \@excluding, \@operator, \@value, $authtypecode, $orderby );
 504
 505 This builds a query for searching for authorities, in the style of
 506 L<C4::AuthoritiesMarc::SearchAuthorities>.
 507
 508 Arguments:
 509
 510 =over 4
 511
 512 =item marclist
 513
 514 An arrayref containing where the particular term should be searched for.
 515 Options are: mainmainentry, mainentry, match, match-heading, see-from, and
 516 thesaurus. If left blank, any field is used.
 517
 518 =item and_or
 519
 520 Totally ignored. It is never used in L<C4::AuthoritiesMarc::SearchAuthorities>.
 521
 522 =item excluding
 523
 524 Also ignored.
 525
 526 =item operator
 527
 528 What form of search to do. Options are: is (phrase, no truncation, whole field
 529 must match), = (number exact match), exact (phrase, no truncation, whole field
 530 must match). If left blank, then word list, right truncated, anywhere is used.
 531
 532 =item value
 533
 534 The actual user-provided string value to search for.
 535
 536 =item authtypecode
 537
 538 The authority type code to search within. If blank, then all will be searched.
 539
 540 =item orderby
 541
 542 The order to sort the results by. Options are Relevance, HeadingAsc,
 543 HeadingDsc, AuthidAsc, AuthidDsc.
 544
 545 =back
 546
 547 marclist, operator, and value must be the same length, and the values at
 548 index /i/ all relate to each other.
 549
 550 This returns a query, which is a black box object that can be passed to the
 551 appropriate search object.
 552
 553 =cut
 554
 555 our $koha_to_index_name = {
 556     mainmainentry   => 'heading-main',
 557     mainentry       => 'heading',
 558     match           => 'match',
 559     'match-heading' => 'match-heading',
 560     'see-from'      => 'match-heading-see-from',
 561     thesaurus       => 'subject-heading-thesaurus',
 562     'thesaurus-conventions' => 'subject-heading-thesaurus-conventions',
 563     any             => '',
 564     all             => ''
 565 };
 566
 567 # Note that sears and aat map to 008/11 values here
 568 # but don't appear in C4/Headin/MARC21 thesaurus
 569 # because they don't have values in controlled field indicators
 570 # https://www.loc.gov/marc/authority/ad008.html
 571 our $thesaurus_to_value = {
 572    lcsh  => 'a',
 573    lcac  => 'b',
 574    mesh  => 'c',
 575    nal   => 'd',
 576    notspecified => 'n',
 577    cash  => 'k',
 578    rvm   => 'v',
 579    aat   => 'r',
 580    sears => 's'
 581 };
 582
 583 sub build_authorities_query_compat {
 584     my ( $self, $marclist, $and_or, $excluding, $operator, $value,
 585         $authtypecode, $orderby )
 586       = @_;
 587
 588     # This turns the old-style many-options argument form into a more
 589     # extensible hash form that is understood by L<build_authorities_query>.
 590     my @searches;
 591     my $mappings = $self->get_elasticsearch_mappings();
 592
 593     # Convert to lower case
 594     $marclist = [map(lc, @{$marclist})];
 595     $orderby  = lc $orderby;
 596
 597     my @indexes;
 598     # Make sure everything exists
 599     foreach my $m (@$marclist) {
 600
 601         $m = exists $koha_to_index_name->{$m} ? $koha_to_index_name->{$m} : $m;
 602         push @indexes, $m;
 603         warn "Unknown search field $m in marclist" unless (defined $mappings->{properties}->{$m} || $m eq '' || $m eq 'match-heading');
 604     }
 605     for ( my $i = 0 ; $i < @$value ; $i++ ) {
 606         next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches
 607         $value->[$i] = $thesaurus_to_value->{ $value->[$i] }
 608             if( defined $thesaurus_to_value->{ $value->[$i] } && $indexes[$i] eq 'subject-heading-thesaurus' );
 609         push @searches,
 610           {
 611             where    => $indexes[$i],
 612             operator => $operator->[$i],
 613             value    => $value->[$i],
 614           };
 615     }
 616
 617     my %sort;
 618     my $sort_field =
 619         ( $orderby =~ /^heading/ ) ? 'heading__sort'
 620       : ( $orderby =~ /^auth/ )    ? 'local-number__sort'
 621       :                              undef;
 622     if ($sort_field) {
 623         my $sort_order = ( $orderby =~ /asc$/ ) ? 'asc' : 'desc';
 624         %sort = ( $sort_field => $sort_order, );
 625     }
 626     my %search = (
 627         searches     => \@searches,
 628         authtypecode => $authtypecode,
 629     );
 630     $search{sort} = \%sort if %sort;
 631     my $query = $self->build_authorities_query( \%search );
 632     return $query;
 633 }
 634
 635 =head2 _build_scan_query
 636
 637     my ($query, $query_str) = $builder->_build_scan_query(\@operands, \@indexes)
 638
 639 This will build an aggregation scan query that can be issued to elasticsearch from
 640 the provided string input.
 641
 642 =cut
 643
 644 our %scan_field_convert = (
 645     'ti' => 'title',
 646     'au' => 'author',
 647     'su' => 'subject',
 648     'se' => 'title-series',
 649     'pb' => 'publisher',
 650 );
 651
 652 sub _build_scan_query {
 653     my ( $self, $operands, $indexes ) = @_;
 654
 655     my $term = scalar( @$operands ) == 0 ? '' : $operands->[0];
 656     my $index = scalar( @$indexes ) == 0 ? 'subject' : $indexes->[0];
 657
 658     my ( $f, $d ) = split( /,/, $index);
 659     $index = $scan_field_convert{$f} || $f;
 660
 661     my $res;
 662     $res->{query} = {
 663         query_string => {
 664             query => '*'
 665         }
 666     };
 667     $res->{aggregations} = {
 668         $index => {
 669             terms => {
 670                 field => $index . '__facet',
 671                 order => { '_key' => 'asc' },
 672                 include => $self->_create_regex_filter($self->clean_search_term($term)) . '.*'
 673             }
 674         }
 675     };
 676     return ($res, $term);
 677 }
 678
 679 =head2 _create_regex_filter
 680
 681     my $filter = $builder->_create_regex_filter('term')
 682
 683 This will create a regex filter that can be used with an aggregation query.
 684
 685 =cut
 686
 687 sub _create_regex_filter {
 688     my ($self, $term) = @_;
 689
 690     my $result = '';
 691     foreach my $c (split(//, quotemeta($term))) {
 692         my $lc = lc($c);
 693         my $uc = uc($c);
 694         $result .= $lc ne $uc ? '[' . $lc . $uc . ']' : $c;
 695     }
 696     return $result;
 697 }
 698
 699 =head2 _convert_sort_fields
 700
 701     my @sort_params = _convert_sort_fields(@sort_by)
 702
 703 Converts the zebra-style sort index information into elasticsearch-style.
 704
 705 C<@sort_by> is the same as presented to L<build_query_compat>, and it returns
 706 something that can be sent to L<build_query>.
 707
 708 =cut
 709
 710 sub _convert_sort_fields {
 711     my ( $self, @sort_by ) = @_;
 712
 713     # Turn the sorting into something we care about.
 714     my %sort_field_convert = (
 715         acqdate     => 'date-of-acquisition',
 716         author      => 'author',
 717         call_number => 'cn-sort',
 718         popularity  => 'issues',
 719         relevance   => undef,       # default
 720         title       => 'title',
 721         pubdate     => 'date-of-publication',
 722         biblionumber => 'local-number',
 723     );
 724     my %sort_order_convert =
 725       ( qw( desc desc ), qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) );
 726
 727     # Convert the fields and orders, drop anything we don't know about.
 728     grep { $_->{field} } map {
 729         my ( $f, $d ) = /(.+)_(.+)/;
 730         {
 731             field     => $sort_field_convert{$f},
 732             direction => $sort_order_convert{$d}
 733         }
 734     } @sort_by;
 735 }
 736
 737 sub _convert_index_fields {
 738     my ( $self, @indexes ) = @_;
 739
 740     my %index_type_convert =
 741       ( __default => undef, phr => 'phrase', rtrn => 'right-truncate', 'st-year' => 'st-year' );
 742
 743     @indexes = grep { $_ ne q{} } @indexes; # Remove any blank indexes, i.e. keyword
 744
 745     # Convert according to our table, drop anything that doesn't convert.
 746     # If a field starts with mc- we save it as it's used (and removed) later
 747     # when joining things, to indicate we make it an 'OR' join.
 748     # (Sorry, this got a bit ugly after special cases were found.)
 749     map {
 750         # Lower case all field names
 751         my ( $f, $t ) = map(lc, split /,/);
 752         my $mc = '';
 753         if ($f =~ /^mc-/) {
 754             $mc = 'mc-';
 755             $f =~ s/^mc-//;
 756         }
 757         my $r = {
 758             field => exists $index_field_convert{$f} ? $index_field_convert{$f} : $f,
 759             type  => $index_type_convert{ $t // '__default' }
 760         };
 761         $r->{field} = ($mc . $r->{field}) if $mc && $r->{field};
 762         $r->{field} || $r->{type} ? $r : undef;
 763     } @indexes;
 764 }
 765
 766 =head2 _convert_index_strings
 767
 768     my @searches = $self->_convert_index_strings(@searches);
 769
 770 Similar to L<_convert_index_fields>, this takes strings of the form
 771 B<field:search term> and rewrites the field from zebra-style to
 772 elasticsearch-style. Anything it doesn't understand is returned verbatim.
 773
 774 =cut
 775
 776 sub _convert_index_strings {
 777     my ( $self, @searches ) = @_;
 778     my @res;
 779     foreach my $s (@searches) {
 780         next if $s eq '';
 781         my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/;
 782         unless ( defined($field) && defined($term) ) {
 783             push @res, $s;
 784             next;
 785         }
 786         my ($conv) = $self->_convert_index_fields($field);
 787         unless ( defined($conv) ) {
 788             push @res, $s;
 789             next;
 790         }
 791         push @res, ($conv->{field} ? $conv->{field} . ':' : '')
 792             . $self->_modify_string_by_type( %$conv, operand => $term );
 793     }
 794     return @res;
 795 }
 796
 797 =head2 _convert_index_strings_freeform
 798
 799     my $search = $self->_convert_index_strings_freeform($search);
 800
 801 This is similar to L<_convert_index_strings>, however it'll search out the
 802 things to change within the string. So it can handle strings such as
 803 C<(su:foo) AND (su:bar)>, converting the C<su> appropriately.
 804
 805 If there is something of the form "su,complete-subfield" or something, the
 806 second part is stripped off as we can't yet handle that. Making it work
 807 will have to wait for a real query parser.
 808
 809 =cut
 810
 811 sub _convert_index_strings_freeform {
 812     my ( $self, $search ) = @_;
 813     # @TODO: Currently will alter also fields contained within quotes:
 814     # `searching for "stuff cn:123"` for example will become
 815     # `searching for "stuff local-number:123"
 816     #
 817     # Fixing this is tricky, one possibility:
 818     # https://stackoverflow.com/questions/19193876/perl-regex-to-match-a-string-that-is-not-enclosed-in-quotes
 819     # Still not perfect, and will not handle escaped quotes within quotes and assumes balanced quotes.
 820     #
 821     # Another, not so elegant, solution could be to replace all quoted content with placeholders, and put
 822     # them back when processing is done.
 823
 824     # Lower case field names
 825     $search =~ s/($field_name_pattern)(?:,[\w-]*)?($multi_field_pattern):/\L$1\E$2:/og;
 826     # Resolve possible field aliases
 827     $search =~ s/($field_name_pattern)($multi_field_pattern):/(exists $index_field_convert{$1} ? $index_field_convert{$1} : $1).($1 eq 'kw' ? "$2" : "$2:")/oge;
 828     return $search;
 829 }
 830
 831 =head2 _modify_string_by_type
 832
 833     my $str = $self->_modify_string_by_type(%index_field);
 834
 835 If you have a search term (operand) and a type (phrase, right-truncated), this
 836 will convert the string to have the function in lucene search terms, e.g.
 837 wrapping quotes around it.
 838
 839 =cut
 840
 841 sub _modify_string_by_type {
 842     my ( $self, %idx ) = @_;
 843
 844     my $type = $idx{type} || '';
 845     my $str = $idx{operand};
 846     return $str unless $str;    # Empty or undef, we can't use it.
 847
 848     $str .= '*' if $type eq 'right-truncate';
 849     $str = '"' . $str . '"' if $type eq 'phrase' && $str !~ /^".*"$/;
 850     if ($type eq 'st-year') {
 851         if ($str =~ /^(.*)-(.*)$/) {
 852             my $from = $1 || '*';
 853             my $until = $2 || '*';
 854             $str = "[$from TO $until]";
 855         }
 856     }
 857     return $str;
 858 }
 859
 860 =head2 _join_queries
 861
 862     my $query_str = $self->_join_queries(@query_parts);
 863
 864 This takes a list of query parts, that might be search terms on their own, or
 865 booleaned together, or specifying fields, or whatever, wraps them in
 866 parentheses, and ANDs them all together. Suitable for feeding to the ES
 867 query string query.
 868
 869 Note: doesn't AND them together if they specify an index that starts with "mc"
 870 as that was a special case in the original code for dealing with multiple
 871 choice options (you can't search for something that has an itype of A and
 872 and itype of B otherwise.)
 873
 874 =cut
 875
 876 sub _join_queries {
 877     my ( $self, @parts ) = @_;
 878
 879     my @norm_parts = grep { defined($_) && $_ ne '' && $_ !~ /^mc-/ } @parts;
 880     my @mc_parts =
 881       map { s/^mc-//r } grep { defined($_) && $_ ne '' && $_ =~ /^mc-/ } @parts;
 882     return () unless @norm_parts + @mc_parts;
 883     return ( @norm_parts, @mc_parts )[0] if @norm_parts + @mc_parts == 1;
 884
 885     # Group limits by field, so they can be OR'ed together
 886     my %mc_limits;
 887     foreach my $mc_part (@mc_parts) {
 888         my ($field, $value) = split /:/, $mc_part, 2;
 889         $mc_limits{$field} //= [];
 890         push @{ $mc_limits{$field} }, $value;
 891     }
 892
 893     @mc_parts = map {
 894         sprintf('%s:(%s)', $_, join (' OR ', @{ $mc_limits{$_} }));
 895     } sort keys %mc_limits;
 896
 897     @norm_parts = map { "($_)" } @norm_parts;
 898
 899     return join( ' AND ', @norm_parts, @mc_parts);
 900 }
 901
 902 =head2 _make_phrases
 903
 904     my @phrased_queries = $self->_make_phrases(@query_parts);
 905
 906 This takes the supplied queries and forces them to be phrases by wrapping
 907 quotes around them. It understands field prefixes, e.g. 'subject:' and puts
 908 the quotes outside of them if they're there.
 909
 910 =cut
 911
 912 sub _make_phrases {
 913     my ( $self, @parts ) = @_;
 914     map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts;
 915 }
 916
 917 =head2 _create_query_string
 918
 919     my @query_strings = $self->_create_query_string(@queries);
 920
 921 Given a list of hashrefs, it will turn them into a lucene-style query string.
 922 The hash should contain field, type (both for the indexes), operator, and
 923 operand.
 924
 925 =cut
 926
 927 sub _create_query_string {
 928     my ( $self, @queries ) = @_;
 929
 930     map {
 931         my $otor  = $_->{operator} ? $_->{operator} . ' ' : '';
 932         my $field = $_->{field}    ? $_->{field} . ':'    : '';
 933
 934         my $oand = $self->_modify_string_by_type(%$_);
 935         $oand = "($oand)" if $field && scalar(split(/\s+/, $oand)) > 1 && (!defined $_->{type} || $_->{type} ne 'st-year');
 936         "$otor($field$oand)";
 937     } @queries;
 938 }
 939
 940 =head2 clean_search_term
 941
 942     my $term = $self->clean_search_term($term);
 943
 944 This cleans a search term by removing any funny characters that may upset
 945 ES and give us an error. It also calls L<_convert_index_strings_freeform>
 946 to ensure those parts are correct.
 947
 948 =cut
 949
 950 sub clean_search_term {
 951     my ( $self, $term ) = @_;
 952
 953     # Lookahead for checking if we are inside quotes
 954     my $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)';
 955
 956     # Some hardcoded searches (like with authorities) produce things like
 957     # 'an=123', when it ought to be 'an:123' for our purposes.
 958     $term =~ s/=/:/g;
 959
 960     $term = $self->_convert_index_strings_freeform($term);
 961
 962     # Remove unbalanced quotes
 963     my $unquoted = $term;
 964     my $count = ($unquoted =~ tr/"/ /);
 965     if ($count % 2 == 1) {
 966         $term = $unquoted;
 967     }
 968     $term = $self->_query_regex_escape_process($term);
 969
 970     # because of _truncate_terms and if QueryAutoTruncate enabled
 971     # we will have any special operators ruined by _truncate_terms:
 972     # for ex. search for "test [6 TO 7]" will be converted to "test* [6* TO* 7]"
 973     # so no reason to keep ranges in QueryAutoTruncate==true case:
 974     my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
 975     unless($truncate) {
 976         # replace all ranges with any square/curly brackets combinations to temporary substitutions (ex: "{a TO b]"" -> "~~LC~~a TO b~~RS~~")
 977         # (where L is for left and C is for Curly and so on)
 978         $term =~ s/
 979             (?<!\\)
 980             (?<backslashes>(?:[\\]{2})*)
 981             (?<leftbracket>\{|\[)
 982             (?<ranges>
 983                 [^\s\[\]\{\}]+\ TO\ [^\s\[\]\{\}]+
 984                 (?<!\\)
 985                 (?:[\\]{2})*
 986             )
 987             (?<rightbracket>\}|\])
 988         /$+{backslashes}.'~~L'.($+{leftbracket} eq '[' ? 'S':'C').'~~'.$+{ranges}.'~~R'.($+{rightbracket} eq ']' ? 'S':'C').'~~'/gex;
 989     }
 990     # save all regex contents away before escaping brackets:
 991     # (same trick as with brackets above, just RE for 'RegularExpression')
 992     my @saved_regexes;
 993     my $rgx_i = 0;
 994     while(
 995             $term =~ s@(
 996                 (?<!\\)(?:[\\]{2})*/
 997                 (?:[^/]+|(?<=\\)(?:[\\]{2})*/)+
 998                 (?<!\\)(?:[\\]{2})*/
 999             )$lookahead@~~RE$rgx_i~~@x
1000     ) {
1001         @saved_regexes[$rgx_i++] = $1;
1002     }
1003
1004     # remove leading and trailing colons mixed with optional slashes and spaces
1005     $term =~ s/^([\s\\]*:\s*)+//;
1006     $term =~ s/([\s\\]*:\s*)+$//;
1007     # remove unquoted colons that have whitespace on either side of them
1008     $term =~ s/([\s\\]*:\s*)+(\s+)$lookahead/$2/g;
1009     $term =~ s/(\s+)([\s\\]*:\s*)+$lookahead/$1/g;
1010     # replace with spaces all repeated colons no matter how they surrounded with spaces and slashes
1011     $term =~ s/([\s\\]*:\s*){2,}$lookahead/ /g;
1012     # screen all followups for colons after first colon,
1013     # and correctly ignore unevenly backslashed:
1014     $term =~ s/((?<!\\)(?:[\\]{2})*:[^:\s]+(?<!\\)(?:[\\]{2})*)(?=:)/$1\\/g;
1015
1016     # screen all exclamation signs that either are the last symbol or have white space after them
1017     # or are followed by close parentheses
1018     $term =~ s/(?:[\s\\]*!\s*)+(\s|$|\))/$1/g;
1019
1020     # screen all brackets with backslash
1021     $term =~ s/(?<!\\)(?:[\\]{2})*([\{\}\[\]])$lookahead/\\$1/g;
1022
1023     # restore all regex contents after escaping brackets:
1024     for (my $i = 0; $i < @saved_regexes; $i++) {
1025         $term =~ s/~~RE$i~~/$saved_regexes[$i]/;
1026     }
1027     unless($truncate) {
1028         # restore temporary weird substitutions back to normal brackets
1029         $term =~ s/~~L(C|S)~~([^\s\[\]\{\}]+ TO [^\s\[\]\{\}]+)~~R(C|S)~~/($1 eq 'S' ? '[':'{').$2.($3 eq 'S' ? ']':'}')/ge;
1030     }
1031     return $term;
1032 }
1033
1034 =head2 _query_regex_escape_process
1035
1036     my $query = $self->_query_regex_escape_process($query);
1037
1038 Processes query in accordance with current "QueryRegexEscapeOptions" system preference setting.
1039
1040 =cut
1041
1042 sub _query_regex_escape_process {
1043     my ($self, $query) = @_;
1044     my $regex_escape_options = C4::Context->preference("QueryRegexEscapeOptions");
1045     if ($regex_escape_options ne 'dont_escape') {
1046         if ($regex_escape_options eq 'escape') {
1047             # Will escape unescaped slashes (/) while preserving
1048             # unescaped slashes within quotes
1049             # @TODO: assumes quotes are always balanced and will
1050             # not handle escaped quotes properly, should perhaps be
1051             # replaced with a more general parser solution
1052             # so that this function is ever only provided with unquoted
1053             # query parts
1054             $query =~ s@(?:(?<!\\)((?:[\\]{2})*)(?=/))(?![^"]*"(?:[^"]*"[^"]*")*[^"]*$)@\\$1@g;
1055         }
1056         elsif($regex_escape_options eq 'unescape_escaped') {
1057             # Will unescape escaped slashes (\/) and escape
1058             # unescaped slashes (/) while preserving slashes within quotes
1059             # The same limitatations as above apply for handling of quotes
1060             $query =~ s@(?:(?<!\\)(?:((?:[\\]{2})*[\\])|((?:[\\]{2})*))(?=/))(?![^"]*"(?:[^"]*"[^"]*")*[^"]*$)@($1 ? substr($1, 0, -1) : ($2 . "\\"))@ge;
1061         }
1062     }
1063     return $query;
1064 }
1065
1066 =head2 _fix_limit_special_cases
1067
1068     my $limits = $self->_fix_limit_special_cases($limits);
1069
1070 This converts any special cases that the limit specifications have into things
1071 that are more readily processable by the rest of the code.
1072
1073 The argument should be an arrayref, and it'll return an arrayref.
1074
1075 =cut
1076
1077 sub _fix_limit_special_cases {
1078     my ( $self, $limits ) = @_;
1079
1080     my @new_lim;
1081     foreach my $l (@$limits) {
1082
1083         # This is set up by opac-search.pl
1084         if ( $l =~ /^yr,st-numeric,ge[=:]/ ) {
1085             my ( $start, $end ) =
1086               ( $l =~ /^yr,st-numeric,ge[=:](.*) and yr,st-numeric,le[=:](.*)$/ );
1087             next unless defined($start) && defined($end);
1088             push @new_lim, "date-of-publication:[$start TO $end]";
1089         }
1090         elsif( $l =~ /^search_filter:/ ){
1091             # Here we are going to get the query as a string, clean it, and take care of the part of the limit
1092             # Calling build_query_compat here is avoided because we generate more complex query structures
1093             my ($filter_id) = ( $l =~ /^search_filter:(.*)$/ );
1094             my $search_filter = Koha::SearchFilters->find( $filter_id );
1095             next unless $search_filter;
1096             my ($expanded_lim,$query_lim) = $search_filter->expand_filter;
1097             # In the case of nested filters we need to expand them all
1098             foreach my $el ( @{$self->_fix_limit_special_cases($expanded_lim)} ){
1099                 push @new_lim, $el;
1100             }
1101             # We need to clean the query part as we have built a string from the original search
1102             push @new_lim, $self->clean_search_term( $query_lim );
1103         }
1104         elsif ( $l =~ /^yr,st-numeric[=:]/ ) {
1105             my ($date) = ( $l =~ /^yr,st-numeric[=:](.*)$/ );
1106             next unless defined($date);
1107             $date = $self->_modify_string_by_type(type => 'st-year', operand => $date);
1108             push @new_lim, "date-of-publication:$date";
1109         }
1110         elsif ( $l =~ 'multibranchlimit|^branch' ) {
1111             my $branchfield  = C4::Context->preference('SearchLimitLibrary');
1112             my @branchcodes;
1113             if( $l =~ 'multibranchlimit' ) {
1114                 my ($group_id) = ( $l =~ /^multibranchlimit:(.*)$/ );
1115                 my $search_group = Koha::Library::Groups->find( $group_id );
1116                 @branchcodes = map { $_->branchcode } $search_group->all_libraries;
1117                 @branchcodes = sort { $a cmp $b } @branchcodes;
1118             } else {
1119                 @branchcodes = ( $l =~ /^branch:(.*)$/ );
1120             }
1121
1122             if (@branchcodes) {
1123                 # We quote the branchcodes here to prevent issues when codes are reserved words in ES, e.g. OR, AND, NOT, etc.
1124                 if ( $branchfield eq "homebranch" ) {
1125                     push @new_lim, sprintf "(%s)", join " OR ", map { 'homebranch: "' . $_ . '"' } @branchcodes;
1126                 }
1127                 elsif ( $branchfield eq "holdingbranch" ) {
1128                     push @new_lim, sprintf "(%s)", join " OR ", map { 'holdingbranch: "' . $_ . '"' } @branchcodes;
1129                 }
1130                 else {
1131                     push @new_lim, sprintf "(%s OR %s)",
1132                       join( " OR ", map { 'homebranch: "' . $_ . '"' } @branchcodes ),
1133                       join( " OR ", map { 'holdingbranch: "' . $_ . '"' } @branchcodes );
1134                 }
1135             }
1136         }
1137         elsif ( $l =~ /^available$/ ) {
1138             push @new_lim, 'available:true';
1139         }
1140         elsif ( $l =~ /^\s*(kw\b[\w,-]*?):(.*)/) {
1141             my ( $field, $term ) = ($1, $2);
1142             if ( defined($field) && defined($term) && $field =~ /,phr$/) {
1143                 push @new_lim, "(\"$term\")";
1144             }
1145             else {
1146                 push @new_lim, $term;
1147             }
1148         }
1149         else {
1150             my ( $field, $term ) = $l =~ /^\s*([\w,-]*?):(.*)/;
1151             $field =~ s/,phr$//; #We are quoting all the limits as phrase, this prevents from quoting again later
1152             if ( defined($field) && defined($term) ) {
1153                 push @new_lim, "$field:(\"$term\")";
1154             }
1155             else {
1156                 push @new_lim, $l;
1157             }
1158         }
1159     }
1160     return \@new_lim;
1161 }
1162
1163 =head2 _sort_field
1164
1165     my $field = $self->_sort_field($field);
1166
1167 Given a field name, this works out what the actual name of the field to sort
1168 on should be. A '__sort' suffix is added for fields with a sort version, and
1169 for text fields either '.phrase' (for sortable versions) or '.raw' is appended
1170 to avoid sorting on a tokenized value.
1171
1172 =cut
1173
1174 sub _sort_field {
1175     my ($self, $f) = @_;
1176
1177     my $mappings = $self->get_elasticsearch_mappings();
1178     my $textField = defined $mappings->{properties}{$f}{type} && $mappings->{properties}{$f}{type} eq 'text';
1179     if (!defined $self->sort_fields()->{$f} || $self->sort_fields()->{$f}) {
1180         $f .= '__sort';
1181     } else {
1182         # We need to add '.raw' to text fields without a sort field,
1183         # otherwise it'll sort based on the tokenised form.
1184         $f .= '.raw' if $textField;
1185     }
1186     return $f;
1187 }
1188
1189 =head2 _truncate_terms
1190
1191     my $query = $self->_truncate_terms($query);
1192
1193 Given a string query this function appends '*' wildcard  to all terms except
1194 operands and double quoted strings.
1195
1196 =cut
1197
1198 sub _truncate_terms {
1199     my ( $self, $query ) = @_;
1200
1201     my @tokens = $self->_split_query( $query );
1202
1203     # Filter out empty tokens
1204     my @words = grep { $_ !~ /^\s*$/ } @tokens;
1205
1206     # Append '*' to words if needed, ie. if it ends in a word character and is not a keyword
1207     my @terms = map {
1208         my $w = $_;
1209         (/\W$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*";
1210     } @words;
1211
1212     return join ' ', @terms;
1213 }
1214
1215 =head2 _split_query
1216
1217     my @token = $self->_split_query($query_str);
1218
1219 Given a string query this function splits it to tokens taking into account
1220 any field prefixes and quoted strings.
1221
1222 =cut
1223
1224 my $tokenize_split_re = qr/((?:${field_name_pattern}${multi_field_pattern}:)?"[^"]+"|\s+)/;
1225
1226 sub _split_query {
1227     my ( $self, $query ) = @_;
1228
1229     # '"donald duck" title:"the mouse" and peter" get split into
1230     # ['', '"donald duck"', '', ' ', '', 'title:"the mouse"', '', ' ', 'and', ' ', 'pete']
1231     my @tokens = split $tokenize_split_re, $query;
1232
1233     # Filter out empty values
1234     @tokens = grep( /\S/, @tokens );
1235
1236     return @tokens;
1237 }
1238
1239 =head2 _search_fields
1240     my $weighted_fields = $self->_search_fields({
1241         is_opac => 0,
1242         weighted_fields => 1,
1243         subfield => 'raw'
1244     });
1245
1246 Generate a list of searchable fields to be used for Elasticsearch queries
1247 applied to multiple fields.
1248
1249 Returns an arrayref of field names for either OPAC or staff interface, with
1250 possible weights and subfield appended to each field name depending on the
1251 options provided.
1252
1253 =over 4
1254
1255 =item C<$params>
1256
1257 Hashref with options. The parameter C<is_opac> indicates whether the searchable
1258 fields for OPAC or staff interface should be retrieved. If C<weighted_fields> is set
1259 fields weights will be applied on returned fields. C<subfield> can be used to
1260 provide a subfield that will be appended to fields as "C<field_name>.C<subfield>".
1261
1262 =back
1263
1264 =cut
1265
1266 sub _search_fields {
1267     my ($self, $params) = @_;
1268     $params //= {
1269         is_opac => 0,
1270         weighted_fields => 0,
1271         whole_record => 0,
1272         # This is a hack for authorities build_authorities_query
1273         # can hopefully be removed in the future
1274         subfield => undef,
1275     };
1276     my $cache = Koha::Caches->get_instance();
1277     my $cache_key = 'elasticsearch_search_fields' . ($params->{is_opac} ? '_opac' : '_staff_client') . "_" . $self->index;
1278     my $search_fields = $cache->get_from_cache($cache_key, { unsafe => 1 });
1279     if (!$search_fields) {
1280         # The reason we don't use Koha::SearchFields->search here is we don't
1281         # want or need resultset wrapped as Koha::SearchField object.
1282         # It does not make any sense in this context and would cause
1283         # unnecessary overhead sice we are only querying for data
1284         # Also would not work, or produce strange results, with the "columns"
1285         # option.
1286         my $schema = Koha::Database->schema;
1287         my $result = $schema->resultset('SearchField')->search(
1288             {
1289                 $params->{is_opac} ? (
1290                     'opac' => 1,
1291                 ) : (
1292                     'staff_client' => 1
1293                 ),
1294                 'type' => { '!=' => 'boolean' },
1295                 'search_marc_map.index_name' => $self->index,
1296                 'search_marc_map.marc_type' => C4::Context->preference('marcflavour'),
1297                 'search_marc_to_fields.search' => 1,
1298             },
1299             {
1300                 columns => [qw/name weight/],
1301                 collapse => 1,
1302                 join => {search_marc_to_fields => 'search_marc_map'},
1303             }
1304         );
1305         my @search_fields;
1306         while (my $search_field = $result->next) {
1307             push @search_fields, [
1308                 lc $search_field->name,
1309                 $search_field->weight ? $search_field->weight : ()
1310             ];
1311         }
1312         $search_fields = \@search_fields;
1313         $cache->set_in_cache($cache_key, $search_fields);
1314     }
1315     if ($params->{subfield}) {
1316         my $subfield = $params->{subfield};
1317         $search_fields = [
1318             map {
1319                 # Copy values to avoid mutating cached
1320                 # data (since unsafe is used)
1321                 my ($field, $weight) = @{$_};
1322                 ["${field}.${subfield}", $weight];
1323             } @{$search_fields}
1324         ];
1325     }
1326     if ($params->{weighted_fields}) {
1327         return [map { join('^', @{$_}) } @{$search_fields}];
1328     }
1329     else {
1330         # Exclude weight from field
1331         return [map { $_->[0] } @{$search_fields}];
1332     }
1333 }
1334
1335 1;