Koha/SearchEngine/Elasticsearch/QueryBuilder.pm

   1 package Koha::SearchEngine::Elasticsearch::QueryBuilder;
   2
   3 # This file is part of Koha.
   4 #
   5 # Copyright 2014 Catalyst IT Ltd.
   6 #
   7 # Koha is free software; you can redistribute it and/or modify it
   8 # under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 3 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # Koha is distributed in the hope that it will be useful, but
  13 # WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  19
  20 =head1 NAME
  21
  22 Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch
  23 query objects from user-supplied queries
  24
  25 =head1 DESCRIPTION
  26
  27 This provides the functions that take a user-supplied search query, and
  28 provides something that can be given to elasticsearch to get answers.
  29
  30 =head1 SYNOPSIS
  31
  32     use Koha::SearchEngine::Elasticsearch::QueryBuilder;
  33     $builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
  34     my $simple_query = $builder->build_query("hello");
  35     # This is currently undocumented because the original code is undocumented
  36     my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
  37
  38 =head1 METHODS
  39
  40 =cut
  41
  42 use base qw(Koha::SearchEngine::Elasticsearch);
  43 use JSON;
  44 use List::MoreUtils qw( each_array );
  45 use Modern::Perl;
  46 use URI::Escape qw( uri_escape_utf8 );
  47
  48 use C4::Context;
  49 use Koha::Exceptions;
  50 use Koha::Caches;
  51
  52 our %index_field_convert = (
  53     'kw' => '',
  54     'ab' => 'abstract',
  55     'au' => 'author',
  56     'lcn' => 'local-classification',
  57     'callnum' => 'local-classification',
  58     'record-type' => 'rtype',
  59     'mc-rtype' => 'rtype',
  60     'mus' => 'rtype',
  61     'lc-card' => 'lc-card-number',
  62     'sn' => 'local-number',
  63     'biblionumber' => 'local-number',
  64     'yr' => 'date-of-publication',
  65     'pubdate' => 'date-of-publication',
  66     'acqdate' => 'date-of-acquisition',
  67     'date/time-last-modified' => 'date-time-last-modified',
  68     'dtlm' => 'date-time-last-modified',
  69     'diss' => 'dissertation-information',
  70     'nb' => 'isbn',
  71     'ns' => 'issn',
  72     'music-number' => 'identifier-publisher-for-music',
  73     'number-music-publisher' => 'identifier-publisher-for-music',
  74     'music' => 'identifier-publisher-for-music',
  75     'ident' => 'identifier-standard',
  76     'cpn' => 'corporate-name',
  77     'cfn' => 'conference-name',
  78     'pn' => 'personal-name',
  79     'pb' => 'publisher',
  80     'pv' => 'provider',
  81     'nt' => 'note',
  82     'notes' => 'note',
  83     'rcn' => 'record-control-number',
  84     'cni' => 'control-number-identifier',
  85     'su' => 'subject',
  86     'su-to' => 'subject',
  87     #'su-geo' => 'subject',
  88     'su-ut' => 'subject',
  89     'ti' => 'title',
  90     'se' => 'title-series',
  91     'ut' => 'title-uniform',
  92     'an' => 'koha-auth-number',
  93     'authority-number' => 'koha-auth-number',
  94     'at' => 'authtype',
  95     'he' => 'heading',
  96     'rank' => 'relevance',
  97     'phr' => 'st-phrase',
  98     'wrdl' => 'st-word-list',
  99     'rt' => 'right-truncation',
 100     'rtrn' => 'right-truncation',
 101     'ltrn' => 'left-truncation',
 102     'rltrn' => 'left-and-right',
 103     'mc-itemtype' => 'itemtype',
 104     'mc-ccode' => 'ccode',
 105     'branch' => 'homebranch',
 106     'mc-loc' => 'location',
 107     'loc' => 'location',
 108     'stocknumber' => 'number-local-acquisition',
 109     'inv' => 'number-local-acquisition',
 110     'bc' => 'barcode',
 111     'mc-itype' => 'itype',
 112     'aub' => 'author-personal-bibliography',
 113     'auo' => 'author-in-order',
 114     'ff8-22' => 'ta',
 115     'aud' => 'ta',
 116     'audience' => 'ta',
 117     'frequency-code' => 'ff8-18',
 118     'illustration-code' => 'ff8-18-21',
 119     'regularity-code' => 'ff8-19',
 120     'type-of-serial' => 'ff8-21',
 121     'format' => 'ff8-23',
 122     'conference-code' => 'ff8-29',
 123     'festschrift-indicator' => 'ff8-30',
 124     'index-indicator' => 'ff8-31',
 125     'fiction' => 'lf',
 126     'fic' => 'lf',
 127     'literature-code' => 'lf',
 128     'biography' => 'bio',
 129     'ff8-34' => 'bio',
 130     'biography-code' => 'bio',
 131     'l-format' => 'ff7-01-02',
 132     'lex' => 'lexile-number',
 133     'hi' => 'host-item-number',
 134     'itu' => 'index-term-uncontrolled',
 135     'itg' => 'index-term-genre',
 136 );
 137 my $field_name_pattern = '[\w\-]+';
 138 my $multi_field_pattern = "(?:\\.$field_name_pattern)*";
 139
 140 =head2 get_index_field_convert
 141
 142     my @index_params = Koha::SearchEngine::Elasticsearch::QueryBuilder->get_index_field_convert();
 143
 144 Converts zebra-style search index notation into elasticsearch-style.
 145
 146 C<@indexes> is an array of index names, as presented to L<build_query_compat>,
 147 and it returns something that can be sent to L<build_query>.
 148
 149 B<TODO>: this will pull from the elasticsearch mappings table to figure out
 150 types.
 151
 152 =cut
 153
 154 sub get_index_field_convert() {
 155     return \%index_field_convert;
 156 }
 157
 158 =head2 build_query
 159
 160     my $simple_query = $builder->build_query("hello", %options)
 161
 162 This will build a query that can be issued to elasticsearch from the provided
 163 string input. This expects a lucene style search form (see
 164 L<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax>
 165 for details.)
 166
 167 It'll make an attempt to respect the various query options.
 168
 169 Additional options can be provided with the C<%options> hash.
 170
 171 =over 4
 172
 173 =item sort
 174
 175 This should be an arrayref of hashrefs, each containing a C<field> and an
 176 C<direction> (optional, defaults to C<asc>.) The results will be sorted
 177 according to these values. Valid values for C<direction> are 'asc' and 'desc'.
 178
 179 =back
 180
 181 =cut
 182
 183 sub build_query {
 184     my ( $self, $query, %options ) = @_;
 185
 186     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 187     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 188     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 189
 190     $query = '*' unless defined $query;
 191
 192     my $res;
 193     my $fields = $self->_search_fields({
 194         is_opac => $options{is_opac},
 195         weighted_fields => $options{weighted_fields},
 196     });
 197     if ($options{whole_record}) {
 198         push @$fields, 'marc_data_array.*';
 199     }
 200     $res->{query} = {
 201         query_string => {
 202             query            => $query,
 203             fuzziness        => $fuzzy_enabled ? 'auto' : '0',
 204             default_operator => 'AND',
 205             fields           => $fields,
 206             lenient          => JSON::true,
 207             analyze_wildcard => JSON::true,
 208         }
 209     };
 210     $res->{query}->{query_string}->{type} = 'cross_fields' if C4::Context->preference('ElasticsearchCrossFields');
 211
 212     if ( $options{sort} ) {
 213         foreach my $sort ( @{ $options{sort} } ) {
 214             my ( $f, $d ) = @$sort{qw/ field direction /};
 215             die "Invalid sort direction, $d"
 216               if $d && ( $d ne 'asc' && $d ne 'desc' );
 217             $d = 'asc' unless $d;
 218
 219             $f = $self->_sort_field($f);
 220             push @{ $res->{sort} }, { $f => { order => $d } };
 221         }
 222     }
 223
 224     # See _convert_facets in Search.pm for how these get turned into
 225     # things that Koha can use.
 226     my $size = C4::Context->preference('FacetMaxCount');
 227     $res->{aggregations} = {
 228         author         => { terms => { field => "author__facet" , size => $size } },
 229         subject        => { terms => { field => "subject__facet", size => $size } },
 230         itype          => { terms => { field => "itype__facet", size => $size} },
 231         location       => { terms => { field => "location__facet", size => $size } },
 232         'su-geo'       => { terms => { field => "su-geo__facet", size => $size} },
 233         'title-series' => { terms => { field => "title-series__facet", size => $size } },
 234         ccode          => { terms => { field => "ccode__facet", size => $size } },
 235         ln             => { terms => { field => "ln__facet", size => $size } },
 236     };
 237
 238     my $display_library_facets = C4::Context->preference('DisplayLibraryFacets');
 239     if (   $display_library_facets eq 'both'
 240         or $display_library_facets eq 'home' ) {
 241         $res->{aggregations}{homebranch} = { terms => { field => "homebranch__facet", size => $size } };
 242     }
 243     if (   $display_library_facets eq 'both'
 244         or $display_library_facets eq 'holding' ) {
 245         $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet", size => $size } };
 246     }
 247     return $res;
 248 }
 249
 250 =head2 build_query_compat
 251
 252     my (
 253         $error,             $query, $simple_query, $query_cgi,
 254         $query_desc,        $limit, $limit_cgi,    $limit_desc,
 255         $stopwords_removed, $query_type
 256       )
 257       = $builder->build_query_compat( \@operators, \@operands, \@indexes,
 258         \@limits, \@sort_by, $scan, $lang, $params );
 259
 260 This handles a search using the same api as L<C4::Search::buildQuery> does.
 261
 262 A very simple query will go in with C<$operands> set to ['query'], and
 263 C<$sort_by> set to ['pubdate_dsc']. This simple case will return with
 264 C<$query> set to something that can perform the search, C<$simple_query>
 265 set to just the search term, C<$query_cgi> set to something that can
 266 reproduce this search, and C<$query_desc> set to something else.
 267
 268 =cut
 269
 270 sub build_query_compat {
 271     my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan,
 272         $lang, $params )
 273       = @_;
 274
 275     my $query;
 276     my $query_str = '';
 277     my $search_param_query_str = '';
 278     my $limits = ();
 279     if ( $scan ) {
 280         ($query, $query_str) = $self->_build_scan_query( $operands, $indexes );
 281         $search_param_query_str = $query_str;
 282     } else {
 283         my @sort_params  = $self->_convert_sort_fields(@$sort_by);
 284         my @index_params = $self->_convert_index_fields(@$indexes);
 285         $limits       = $self->_fix_limit_special_cases($orig_limits);
 286         if ( $params->{suppress} ) { push @$limits, "suppress:false"; }
 287         # Merge the indexes in with the search terms and the operands so that
 288         # each search thing is a handy unit.
 289         unshift @$operators, undef;    # The first one can't have an op
 290         my @search_params;
 291         my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
 292         my $ea = each_array( @$operands, @$operators, @index_params );
 293         while ( my ( $oand, $otor, $index ) = $ea->() ) {
 294             next if ( !defined($oand) || $oand eq '' );
 295             $oand = $self->clean_search_term($oand);
 296             $oand = $self->_truncate_terms($oand) if ($truncate);
 297             push @search_params, {
 298                 operand => $oand,      # the search terms
 299                 operator => defined($otor) ? uc $otor : undef,    # AND and so on
 300                 $index ? %$index : (),
 301             };
 302         }
 303
 304         # We build a string query from limits and the queries. An alternative
 305         # would be to pass them separately into build_query and let it build
 306         # them into a structured ES query itself. Maybe later, though that'd be
 307         # more robust.
 308         $search_param_query_str = join( ' ', $self->_create_query_string(@search_params) );
 309         $query_str = join( ' AND ',
 310             $search_param_query_str || (),
 311             $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
 312
 313         # If there's no query on the left, let's remove the junk left behind
 314         $query_str =~ s/^ AND //;
 315         my %options;
 316         $options{sort} = \@sort_params;
 317         $options{is_opac} = $params->{is_opac};
 318         $options{weighted_fields} = $params->{weighted_fields};
 319         $options{whole_record} = $params->{whole_record};
 320         $query = $self->build_query( $query_str, %options );
 321     }
 322
 323     # We roughly emulate the CGI parameters of the zebra query builder
 324     my $query_cgi = '';
 325     shift @$operators; # Shift out the one we unshifted before
 326     my $ea = each_array( @$operands, @$operators, @$indexes );
 327     while ( my ( $oand, $otor, $index ) = $ea->() ) {
 328         $query_cgi .= '&' if $query_cgi;
 329         $query_cgi .= 'idx=' . uri_escape_utf8( $index // '') . '&q=' . uri_escape_utf8( $oand );
 330         $query_cgi .= '&op=' . uri_escape_utf8( $otor ) if $otor;
 331     }
 332     $query_cgi .= '&scan=1' if ( $scan );
 333
 334     my $simple_query;
 335     $simple_query = $operands->[0] if @$operands == 1;
 336     my $query_desc;
 337     if ( $simple_query ) {
 338         $query_desc = $simple_query;
 339     } else {
 340         $query_desc = $search_param_query_str;
 341     }
 342     my $limit     = $self->_join_queries( $self->_convert_index_strings(@$limits));
 343     my $limit_cgi = ( $orig_limits and @$orig_limits )
 344       ? '&limit=' . join( '&limit=', map { uri_escape_utf8($_) } @$orig_limits )
 345       : '';
 346     my $limit_desc;
 347     $limit_desc = "$limit" if $limit;
 348
 349     return (
 350         undef,  $query,     $simple_query, $query_cgi, $query_desc,
 351         $limit, $limit_cgi, $limit_desc,   undef,      undef
 352     );
 353 }
 354
 355 =head2 build_authorities_query
 356
 357     my $query = $builder->build_authorities_query(\%search);
 358
 359 This takes a nice description of an authority search and turns it into a black-box
 360 query that can then be passed to the appropriate searcher.
 361
 362 The search description is a hashref that looks something like:
 363
 364     {
 365         searches => [
 366             {
 367                 where    => 'Heading',    # search the main entry
 368                 operator => 'exact',        # require an exact match
 369                 value    => 'frogs',        # the search string
 370             },
 371             {
 372                 where    => '',             # search all entries
 373                 operator => '',             # default keyword, right truncation
 374                 value    => 'pond',
 375             },
 376         ],
 377         sort => {
 378             field => 'Heading',
 379             order => 'desc',
 380         },
 381         authtypecode => 'TOPIC_TERM',
 382     }
 383
 384 =cut
 385
 386 sub build_authorities_query {
 387     my ( $self, $search ) = @_;
 388
 389     # Start by making the query parts
 390     my @query_parts;
 391
 392     foreach my $s ( @{ $search->{searches} } ) {
 393         my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
 394         if ( defined $op && ($op eq 'is' || $op eq '=' || $op eq 'exact') ) {
 395             if ($wh) {
 396                 # Match the whole field, case insensitive, UTF normalized.
 397                 push @query_parts, { term => { "$wh.ci_raw" => $val } };
 398             }
 399             else {
 400                 # Match the whole field for all searchable fields, case insensitive,
 401                 # UTF normalized.
 402                 # Given that field data is "The quick brown fox"
 403                 # "The quick brown fox" and "the quick brown fox" will match
 404                 # but not "quick brown fox".
 405                 push @query_parts, {
 406                     multi_match => {
 407                         query => $val,
 408                         fields => $self->_search_fields({ subfield => 'ci_raw' }),
 409                     }
 410                 };
 411             }
 412         }
 413         elsif ( defined $op && $op eq 'start') {
 414             # Match the prefix within a field for all searchable fields.
 415             # Given that field data is "The quick brown fox"
 416             # "The quick bro" will match, but not "quick bro"
 417
 418             # Does not seems to be a multi prefix query
 419             # so we need to create one
 420             if ($wh) {
 421                 # Match prefix of the field.
 422                 push @query_parts, { prefix => {"$wh.ci_raw" => $val} };
 423             }
 424             else {
 425                 my @prefix_queries;
 426                 foreach my $field (@{$self->_search_fields()}) {
 427                     push @prefix_queries, {
 428                         prefix => { "$field.ci_raw" => $val }
 429                     };
 430                 }
 431                 push @query_parts, {
 432                     'bool' => {
 433                         'should' => \@prefix_queries,
 434                         'minimum_should_match' => 1
 435                     }
 436                 };
 437             }
 438         }
 439         else {
 440             # Query all searchable fields.
 441             # Given that field data is "The quick brown fox"
 442             # a search containing any of the words will match, regardless
 443             # of order.
 444
 445             my @tokens = $self->_split_query( $val );
 446             foreach my $token ( @tokens ) {
 447                 $token = $self->_truncate_terms(
 448                     $self->clean_search_term( $token )
 449                 );
 450             }
 451             my $query = $self->_join_queries( @tokens );
 452             my $query_string = {
 453                 query            => $query,
 454                 lenient          => JSON::true,
 455                 analyze_wildcard => JSON::true,
 456             };
 457             if ($wh) {
 458                 $query_string->{default_field} = $wh;
 459             }
 460             else {
 461                 $query_string->{fields} = $self->_search_fields();
 462             }
 463             push @query_parts, { query_string => $query_string };
 464         }
 465     }
 466
 467     # Merge the query parts appropriately
 468     # 'should' behaves like 'or'
 469     # 'must' behaves like 'and'
 470     # Zebra behaviour seem to match must so using that here
 471     my $elastic_query = {};
 472     $elastic_query->{bool}->{must} = \@query_parts;
 473
 474     # Filter by authtypecode if set
 475     if ($search->{authtypecode}) {
 476         $elastic_query->{bool}->{filter} = {
 477             term => {
 478                 "authtype.raw" => $search->{authtypecode}
 479             }
 480         };
 481     }
 482
 483     my $query = {
 484         query => $elastic_query
 485     };
 486
 487     # Add the sort stuff
 488     $query->{sort} = [ $search->{sort} ] if exists $search->{sort};
 489
 490     return $query;
 491 }
 492
 493 =head2 build_authorities_query_compat
 494
 495     my ($query) =
 496       $builder->build_authorities_query_compat( \@marclist, \@and_or,
 497         \@excluding, \@operator, \@value, $authtypecode, $orderby );
 498
 499 This builds a query for searching for authorities, in the style of
 500 L<C4::AuthoritiesMarc::SearchAuthorities>.
 501
 502 Arguments:
 503
 504 =over 4
 505
 506 =item marclist
 507
 508 An arrayref containing where the particular term should be searched for.
 509 Options are: mainmainentry, mainentry, match, match-heading, see-from, and
 510 thesaurus. If left blank, any field is used.
 511
 512 =item and_or
 513
 514 Totally ignored. It is never used in L<C4::AuthoritiesMarc::SearchAuthorities>.
 515
 516 =item excluding
 517
 518 Also ignored.
 519
 520 =item operator
 521
 522 What form of search to do. Options are: is (phrase, no truncation, whole field
 523 must match), = (number exact match), exact (phrase, no truncation, whole field
 524 must match). If left blank, then word list, right truncated, anywhere is used.
 525
 526 =item value
 527
 528 The actual user-provided string value to search for.
 529
 530 =item authtypecode
 531
 532 The authority type code to search within. If blank, then all will be searched.
 533
 534 =item orderby
 535
 536 The order to sort the results by. Options are Relevance, HeadingAsc,
 537 HeadingDsc, AuthidAsc, AuthidDsc.
 538
 539 =back
 540
 541 marclist, operator, and value must be the same length, and the values at
 542 index /i/ all relate to each other.
 543
 544 This returns a query, which is a black box object that can be passed to the
 545 appropriate search object.
 546
 547 =cut
 548
 549 our $koha_to_index_name = {
 550     mainmainentry   => 'heading-main',
 551     mainentry       => 'heading',
 552     match           => 'match',
 553     'match-heading' => 'match-heading',
 554     'see-from'      => 'match-heading-see-from',
 555     thesaurus       => 'subject-heading-thesaurus',
 556     any             => '',
 557     all             => ''
 558 };
 559
 560 sub build_authorities_query_compat {
 561     my ( $self, $marclist, $and_or, $excluding, $operator, $value,
 562         $authtypecode, $orderby )
 563       = @_;
 564
 565     # This turns the old-style many-options argument form into a more
 566     # extensible hash form that is understood by L<build_authorities_query>.
 567     my @searches;
 568     my $mappings = $self->get_elasticsearch_mappings();
 569
 570     # Convert to lower case
 571     $marclist = [map(lc, @{$marclist})];
 572     $orderby  = lc $orderby;
 573
 574     my @indexes;
 575     # Make sure everything exists
 576     foreach my $m (@$marclist) {
 577
 578         $m = exists $koha_to_index_name->{$m} ? $koha_to_index_name->{$m} : $m;
 579         push @indexes, $m;
 580         warn "Unknown search field $m in marclist" unless (defined $mappings->{data}->{properties}->{$m} || $m eq '' || $m eq 'match-heading');
 581     }
 582     for ( my $i = 0 ; $i < @$value ; $i++ ) {
 583         next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches
 584         push @searches,
 585           {
 586             where    => $indexes[$i],
 587             operator => $operator->[$i],
 588             value    => $value->[$i],
 589           };
 590     }
 591
 592     my %sort;
 593     my $sort_field =
 594         ( $orderby =~ /^heading/ ) ? 'heading__sort'
 595       : ( $orderby =~ /^auth/ )    ? 'local-number__sort'
 596       :                              undef;
 597     if ($sort_field) {
 598         my $sort_order = ( $orderby =~ /asc$/ ) ? 'asc' : 'desc';
 599         %sort = ( $sort_field => $sort_order, );
 600     }
 601     my %search = (
 602         searches     => \@searches,
 603         authtypecode => $authtypecode,
 604     );
 605     $search{sort} = \%sort if %sort;
 606     my $query = $self->build_authorities_query( \%search );
 607     return $query;
 608 }
 609
 610 =head2 _build_scan_query
 611
 612     my ($query, $query_str) = $builder->_build_scan_query(\@operands, \@indexes)
 613
 614 This will build an aggregation scan query that can be issued to elasticsearch from
 615 the provided string input.
 616
 617 =cut
 618
 619 our %scan_field_convert = (
 620     'ti' => 'title',
 621     'au' => 'author',
 622     'su' => 'subject',
 623     'se' => 'title-series',
 624     'pb' => 'publisher',
 625 );
 626
 627 sub _build_scan_query {
 628     my ( $self, $operands, $indexes ) = @_;
 629
 630     my $term = scalar( @$operands ) == 0 ? '' : $operands->[0];
 631     my $index = scalar( @$indexes ) == 0 ? 'subject' : $indexes->[0];
 632
 633     my ( $f, $d ) = split( /,/, $index);
 634     $index = $scan_field_convert{$f} || $f;
 635
 636     my $res;
 637     $res->{query} = {
 638         query_string => {
 639             query => '*'
 640         }
 641     };
 642     $res->{aggregations} = {
 643         $index => {
 644             terms => {
 645                 field => $index . '__facet',
 646                 order => { '_key' => 'asc' },
 647                 include => $self->_create_regex_filter($self->clean_search_term($term)) . '.*'
 648             }
 649         }
 650     };
 651     return ($res, $term);
 652 }
 653
 654 =head2 _create_regex_filter
 655
 656     my $filter = $builder->_create_regex_filter('term')
 657
 658 This will create a regex filter that can be used with an aggregation query.
 659
 660 =cut
 661
 662 sub _create_regex_filter {
 663     my ($self, $term) = @_;
 664
 665     my $result = '';
 666     foreach my $c (split(//, quotemeta($term))) {
 667         my $lc = lc($c);
 668         my $uc = uc($c);
 669         $result .= $lc ne $uc ? '[' . $lc . $uc . ']' : $c;
 670     }
 671     return $result;
 672 }
 673
 674 =head2 _convert_sort_fields
 675
 676     my @sort_params = _convert_sort_fields(@sort_by)
 677
 678 Converts the zebra-style sort index information into elasticsearch-style.
 679
 680 C<@sort_by> is the same as presented to L<build_query_compat>, and it returns
 681 something that can be sent to L<build_query>.
 682
 683 =cut
 684
 685 sub _convert_sort_fields {
 686     my ( $self, @sort_by ) = @_;
 687
 688     # Turn the sorting into something we care about.
 689     my %sort_field_convert = (
 690         acqdate     => 'date-of-acquisition',
 691         author      => 'author',
 692         call_number => 'cn-sort',
 693         popularity  => 'issues',
 694         relevance   => undef,       # default
 695         title       => 'title',
 696         pubdate     => 'date-of-publication',
 697     );
 698     my %sort_order_convert =
 699       ( qw( desc desc ), qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) );
 700
 701     # Convert the fields and orders, drop anything we don't know about.
 702     grep { $_->{field} } map {
 703         my ( $f, $d ) = /(.+)_(.+)/;
 704         {
 705             field     => $sort_field_convert{$f},
 706             direction => $sort_order_convert{$d}
 707         }
 708     } @sort_by;
 709 }
 710
 711 sub _convert_index_fields {
 712     my ( $self, @indexes ) = @_;
 713
 714     my %index_type_convert =
 715       ( __default => undef, phr => 'phrase', rtrn => 'right-truncate', 'st-year' => 'st-year' );
 716
 717     @indexes = grep { $_ ne q{} } @indexes; # Remove any blank indexes, i.e. keyword
 718
 719     # Convert according to our table, drop anything that doesn't convert.
 720     # If a field starts with mc- we save it as it's used (and removed) later
 721     # when joining things, to indicate we make it an 'OR' join.
 722     # (Sorry, this got a bit ugly after special cases were found.)
 723     map {
 724         # Lower case all field names
 725         my ( $f, $t ) = map(lc, split /,/);
 726         my $mc = '';
 727         if ($f =~ /^mc-/) {
 728             $mc = 'mc-';
 729             $f =~ s/^mc-//;
 730         }
 731         my $r = {
 732             field => exists $index_field_convert{$f} ? $index_field_convert{$f} : $f,
 733             type  => $index_type_convert{ $t // '__default' }
 734         };
 735         $r->{field} = ($mc . $r->{field}) if $mc && $r->{field};
 736         $r->{field} || $r->{type} ? $r : undef;
 737     } @indexes;
 738 }
 739
 740 =head2 _convert_index_strings
 741
 742     my @searches = $self->_convert_index_strings(@searches);
 743
 744 Similar to L<_convert_index_fields>, this takes strings of the form
 745 B<field:search term> and rewrites the field from zebra-style to
 746 elasticsearch-style. Anything it doesn't understand is returned verbatim.
 747
 748 =cut
 749
 750 sub _convert_index_strings {
 751     my ( $self, @searches ) = @_;
 752     my @res;
 753     foreach my $s (@searches) {
 754         next if $s eq '';
 755         my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/;
 756         unless ( defined($field) && defined($term) ) {
 757             push @res, $s;
 758             next;
 759         }
 760         my ($conv) = $self->_convert_index_fields($field);
 761         unless ( defined($conv) ) {
 762             push @res, $s;
 763             next;
 764         }
 765         push @res, ($conv->{field} ? $conv->{field} . ':' : '')
 766             . $self->_modify_string_by_type( %$conv, operand => $term );
 767     }
 768     return @res;
 769 }
 770
 771 =head2 _convert_index_strings_freeform
 772
 773     my $search = $self->_convert_index_strings_freeform($search);
 774
 775 This is similar to L<_convert_index_strings>, however it'll search out the
 776 things to change within the string. So it can handle strings such as
 777 C<(su:foo) AND (su:bar)>, converting the C<su> appropriately.
 778
 779 If there is something of the form "su,complete-subfield" or something, the
 780 second part is stripped off as we can't yet handle that. Making it work
 781 will have to wait for a real query parser.
 782
 783 =cut
 784
 785 sub _convert_index_strings_freeform {
 786     my ( $self, $search ) = @_;
 787     # @TODO: Currenty will alter also fields contained within quotes:
 788     # `searching for "stuff cn:123"` for example will become
 789     # `searching for "stuff local-number:123"
 790     #
 791     # Fixing this is tricky, one possibility:
 792     # https://stackoverflow.com/questions/19193876/perl-regex-to-match-a-string-that-is-not-enclosed-in-quotes
 793     # Still not perfect, and will not handle escaped quotes within quotes and assumes balanced quotes.
 794     #
 795     # Another, not so elegant, solution could be to replace all quoted content with placeholders, and put
 796     # them back when processing is done.
 797
 798     # Lower case field names
 799     $search =~ s/($field_name_pattern)(?:,[\w-]*)?($multi_field_pattern):/\L$1\E$2:/og;
 800     # Resolve possible field aliases
 801     $search =~ s/($field_name_pattern)($multi_field_pattern):/(exists $index_field_convert{$1} ? $index_field_convert{$1} : $1)."$2:"/oge;
 802     return $search;
 803 }
 804
 805 =head2 _modify_string_by_type
 806
 807     my $str = $self->_modify_string_by_type(%index_field);
 808
 809 If you have a search term (operand) and a type (phrase, right-truncated), this
 810 will convert the string to have the function in lucene search terms, e.g.
 811 wrapping quotes around it.
 812
 813 =cut
 814
 815 sub _modify_string_by_type {
 816     my ( $self, %idx ) = @_;
 817
 818     my $type = $idx{type} || '';
 819     my $str = $idx{operand};
 820     return $str unless $str;    # Empty or undef, we can't use it.
 821
 822     $str .= '*' if $type eq 'right-truncate';
 823     $str = '"' . $str . '"' if $type eq 'phrase' && $str !~ /^".*"$/;
 824     if ($type eq 'st-year') {
 825         if ($str =~ /^(.*)-(.*)$/) {
 826             my $from = $1 || '*';
 827             my $until = $2 || '*';
 828             $str = "[$from TO $until]";
 829         }
 830     }
 831     return $str;
 832 }
 833
 834 =head2 _join_queries
 835
 836     my $query_str = $self->_join_queries(@query_parts);
 837
 838 This takes a list of query parts, that might be search terms on their own, or
 839 booleaned together, or specifying fields, or whatever, wraps them in
 840 parentheses, and ANDs them all together. Suitable for feeding to the ES
 841 query string query.
 842
 843 Note: doesn't AND them together if they specify an index that starts with "mc"
 844 as that was a special case in the original code for dealing with multiple
 845 choice options (you can't search for something that has an itype of A and
 846 and itype of B otherwise.)
 847
 848 =cut
 849
 850 sub _join_queries {
 851     my ( $self, @parts ) = @_;
 852
 853     my @norm_parts = grep { defined($_) && $_ ne '' && $_ !~ /^mc-/ } @parts;
 854     my @mc_parts =
 855       map { s/^mc-//r } grep { defined($_) && $_ ne '' && $_ =~ /^mc-/ } @parts;
 856     return () unless @norm_parts + @mc_parts;
 857     return ( @norm_parts, @mc_parts )[0] if @norm_parts + @mc_parts == 1;
 858
 859     # Group limits by field, so they can be OR'ed together
 860     my %mc_limits;
 861     foreach my $mc_part (@mc_parts) {
 862         my ($field, $value) = split /:/, $mc_part, 2;
 863         $mc_limits{$field} //= [];
 864         push @{ $mc_limits{$field} }, $value;
 865     }
 866
 867     @mc_parts = map {
 868         sprintf('%s:(%s)', $_, join (' OR ', @{ $mc_limits{$_} }));
 869     } sort keys %mc_limits;
 870
 871     @norm_parts = map { "($_)" } @norm_parts;
 872
 873     return join( ' AND ', @norm_parts, @mc_parts);
 874 }
 875
 876 =head2 _make_phrases
 877
 878     my @phrased_queries = $self->_make_phrases(@query_parts);
 879
 880 This takes the supplied queries and forces them to be phrases by wrapping
 881 quotes around them. It understands field prefixes, e.g. 'subject:' and puts
 882 the quotes outside of them if they're there.
 883
 884 =cut
 885
 886 sub _make_phrases {
 887     my ( $self, @parts ) = @_;
 888     map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts;
 889 }
 890
 891 =head2 _create_query_string
 892
 893     my @query_strings = $self->_create_query_string(@queries);
 894
 895 Given a list of hashrefs, it will turn them into a lucene-style query string.
 896 The hash should contain field, type (both for the indexes), operator, and
 897 operand.
 898
 899 =cut
 900
 901 sub _create_query_string {
 902     my ( $self, @queries ) = @_;
 903
 904     map {
 905         my $otor  = $_->{operator} ? $_->{operator} . ' ' : '';
 906         my $field = $_->{field}    ? $_->{field} . ':'    : '';
 907
 908         my $oand = $self->_modify_string_by_type(%$_);
 909         $oand = "($oand)" if $field && scalar(split(/\s+/, $oand)) > 1 && (!defined $_->{type} || $_->{type} ne 'st-year');
 910         "$otor($field$oand)";
 911     } @queries;
 912 }
 913
 914 =head2 clean_search_term
 915
 916     my $term = $self->clean_search_term($term);
 917
 918 This cleans a search term by removing any funny characters that may upset
 919 ES and give us an error. It also calls L<_convert_index_strings_freeform>
 920 to ensure those parts are correct.
 921
 922 =cut
 923
 924 sub clean_search_term {
 925     my ( $self, $term ) = @_;
 926
 927     # Lookahead for checking if we are inside quotes
 928     my $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)';
 929
 930     # Some hardcoded searches (like with authorities) produce things like
 931     # 'an=123', when it ought to be 'an:123' for our purposes.
 932     $term =~ s/=/:/g;
 933
 934     $term = $self->_convert_index_strings_freeform($term);
 935
 936     # Remove unbalanced quotes
 937     my $unquoted = $term;
 938     my $count = ($unquoted =~ tr/"/ /);
 939     if ($count % 2 == 1) {
 940         $term = $unquoted;
 941     }
 942     $term = $self->_query_regex_escape_process($term);
 943
 944     # because of _truncate_terms and if QueryAutoTruncate enabled
 945     # we will have any special operators ruined by _truncate_terms:
 946     # for ex. search for "test [6 TO 7]" will be converted to "test* [6* TO* 7]"
 947     # so no reason to keep ranges in QueryAutoTruncate==true case:
 948     my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
 949     unless($truncate) {
 950         # replace all ranges with any square/curly brackets combinations to temporary substitutions (ex: "{a TO b]"" -> "~~LC~~a TO b~~RS~~")
 951         # (where L is for left and C is for Curly and so on)
 952         $term =~ s/
 953             (?<!\\)
 954             (?<backslashes>(?:[\\]{2})*)
 955             (?<leftbracket>\{|\[)
 956             (?<ranges>
 957                 [^\s\[\]\{\}]+\ TO\ [^\s\[\]\{\}]+
 958                 (?<!\\)
 959                 (?:[\\]{2})*
 960             )
 961             (?<rightbracket>\}|\])
 962         /$+{backslashes}.'~~L'.($+{leftbracket} eq '[' ? 'S':'C').'~~'.$+{ranges}.'~~R'.($+{rightbracket} eq ']' ? 'S':'C').'~~'/gex;
 963     }
 964     # save all regex contents away before escaping brackets:
 965     # (same trick as with brackets above, just RE for 'RegularExpression')
 966     my @saved_regexes;
 967     my $rgx_i = 0;
 968     while(
 969             $term =~ s@(
 970                 (?<!\\)(?:[\\]{2})*/
 971                 (?:[^/]+|(?<=\\)(?:[\\]{2})*/)+
 972                 (?<!\\)(?:[\\]{2})*/
 973             )$lookahead@~~RE$rgx_i~~@x
 974     ) {
 975         @saved_regexes[$rgx_i++] = $1;
 976     }
 977
 978     # remove leading and trailing colons mixed with optional slashes and spaces
 979     $term =~ s/^([\s\\]*:\s*)+//;
 980     $term =~ s/([\s\\]*:\s*)+$//;
 981     # remove unquoted colons that have whitespace on either side of them
 982     $term =~ s/([\s\\]*:\s*)+(\s+)$lookahead/$2/g;
 983     $term =~ s/(\s+)([\s\\]*:\s*)+$lookahead/$1/g;
 984     # replace with spaces all repeated colons no matter how they surrounded with spaces and slashes
 985     $term =~ s/([\s\\]*:\s*){2,}$lookahead/ /g;
 986     # screen all followups for colons after first colon,
 987     # and correctly ignore unevenly backslashed:
 988     $term =~ s/((?<!\\)(?:[\\]{2})*:[^:\s]+(?<!\\)(?:[\\]{2})*)(?=:)/$1\\/g;
 989
 990     # screen all exclamation signs that either are the last symbol or have white space after them
 991     # or are followed by close parentheses
 992     $term =~ s/(?:[\s\\]*!\s*)+(\s|$|\))/$1/g;
 993
 994     # screen all brackets with backslash
 995     $term =~ s/(?<!\\)(?:[\\]{2})*([\{\}\[\]])$lookahead/\\$1/g;
 996
 997     # restore all regex contents after escaping brackets:
 998     for (my $i = 0; $i < @saved_regexes; $i++) {
 999         $term =~ s/~~RE$i~~/$saved_regexes[$i]/;
1000     }
1001     unless($truncate) {
1002         # restore temporary weird substitutions back to normal brackets
1003         $term =~ s/~~L(C|S)~~([^\s\[\]\{\}]+ TO [^\s\[\]\{\}]+)~~R(C|S)~~/($1 eq 'S' ? '[':'{').$2.($3 eq 'S' ? ']':'}')/ge;
1004     }
1005     return $term;
1006 }
1007
1008 =head2 _query_regex_escape_process
1009
1010     my $query = $self->_query_regex_escape_process($query);
1011
1012 Processes query in accordance with current "QueryRegexEscapeOptions" system preference setting.
1013
1014 =cut
1015
1016 sub _query_regex_escape_process {
1017     my ($self, $query) = @_;
1018     my $regex_escape_options = C4::Context->preference("QueryRegexEscapeOptions");
1019     if ($regex_escape_options ne 'dont_escape') {
1020         if ($regex_escape_options eq 'escape') {
1021             # Will escape unescaped slashes (/) while preserving
1022             # unescaped slashes within quotes
1023             # @TODO: assumes quotes are always balanced and will
1024             # not handle escaped qoutes properly, should perhaps be
1025             # replaced with a more general parser solution
1026             # so that this function is ever only provided with unqouted
1027             # query parts
1028             $query =~ s@(?:(?<!\\)((?:[\\]{2})*)(?=/))(?![^"]*"(?:[^"]*"[^"]*")*[^"]*$)@\\$1@g;
1029         }
1030         elsif($regex_escape_options eq 'unescape_escaped') {
1031             # Will unescape escaped slashes (\/) and escape
1032             # unescaped slashes (/) while preserving slashes within quotes
1033             # The same limitatations as above apply for handling of quotes
1034             $query =~ s@(?:(?<!\\)(?:((?:[\\]{2})*[\\])|((?:[\\]{2})*))(?=/))(?![^"]*"(?:[^"]*"[^"]*")*[^"]*$)@($1 ? substr($1, 0, -1) : ($2 . "\\"))@ge;
1035         }
1036     }
1037     return $query;
1038 }
1039
1040 =head2 _fix_limit_special_cases
1041
1042     my $limits = $self->_fix_limit_special_cases($limits);
1043
1044 This converts any special cases that the limit specifications have into things
1045 that are more readily processable by the rest of the code.
1046
1047 The argument should be an arrayref, and it'll return an arrayref.
1048
1049 =cut
1050
1051 sub _fix_limit_special_cases {
1052     my ( $self, $limits ) = @_;
1053
1054     my @new_lim;
1055     foreach my $l (@$limits) {
1056
1057         # This is set up by opac-search.pl
1058         if ( $l =~ /^yr,st-numeric,ge=/ ) {
1059             my ( $start, $end ) =
1060               ( $l =~ /^yr,st-numeric,ge=(.*) and yr,st-numeric,le=(.*)$/ );
1061             next unless defined($start) && defined($end);
1062             push @new_lim, "date-of-publication:[$start TO $end]";
1063         }
1064         elsif ( $l =~ /^yr,st-numeric=/ ) {
1065             my ($date) = ( $l =~ /^yr,st-numeric=(.*)$/ );
1066             next unless defined($date);
1067             $date = $self->_modify_string_by_type(type => 'st-year', operand => $date);
1068             push @new_lim, "date-of-publication:$date";
1069         }
1070         elsif ( $l =~ 'multibranchlimit|^branch' ) {
1071             my $branchfield  = C4::Context->preference('SearchLimitLibrary');
1072             my @branchcodes;
1073             if( $l =~ 'multibranchlimit' ) {
1074                 my ($group_id) = ( $l =~ /^multibranchlimit:(.*)$/ );
1075                 my $search_group = Koha::Library::Groups->find( $group_id );
1076                 @branchcodes = map { $_->branchcode } $search_group->all_libraries;
1077                 @branchcodes = sort { $a cmp $b } @branchcodes;
1078             } else {
1079                 @branchcodes = ( $l =~ /^branch:(.*)$/ );
1080             }
1081
1082             if (@branchcodes) {
1083                 if ( $branchfield eq "homebranch" ) {
1084                     push @new_lim, sprintf "(%s)", join " OR ", map { 'homebranch: ' . $_ } @branchcodes;
1085                 }
1086                 elsif ( $branchfield eq "holdingbranch" ) {
1087                     push @new_lim, sprintf "(%s)", join " OR ", map { 'holdingbranch: ' . $_ } @branchcodes;
1088                 }
1089                 else {
1090                     push @new_lim, sprintf "(%s OR %s)",
1091                       join( " OR ", map { 'homebranch: ' . $_ } @branchcodes ),
1092                       join( " OR ", map { 'holdingbranch: ' . $_ } @branchcodes );
1093                 }
1094             }
1095         }
1096         elsif ( $l =~ /^available$/ ) {
1097             push @new_lim, 'onloan:false';
1098         }
1099         else {
1100             my ( $field, $term ) = $l =~ /^\s*([\w,-]*?):(.*)/;
1101             $field =~ s/,phr$//; #We are quoting all the limits as phrase, this prevents from quoting again later
1102             if ( defined($field) && defined($term) ) {
1103                 push @new_lim, "$field:(\"$term\")";
1104             }
1105             else {
1106                 push @new_lim, $l;
1107             }
1108         }
1109     }
1110     return \@new_lim;
1111 }
1112
1113 =head2 _sort_field
1114
1115     my $field = $self->_sort_field($field);
1116
1117 Given a field name, this works out what the actual name of the field to sort
1118 on should be. A '__sort' suffix is added for fields with a sort version, and
1119 for text fields either '.phrase' (for sortable versions) or '.raw' is appended
1120 to avoid sorting on a tokenized value.
1121
1122 =cut
1123
1124 sub _sort_field {
1125     my ($self, $f) = @_;
1126
1127     my $mappings = $self->get_elasticsearch_mappings();
1128     my $textField = defined $mappings->{data}{properties}{$f}{type} && $mappings->{data}{properties}{$f}{type} eq 'text';
1129     if (!defined $self->sort_fields()->{$f} || $self->sort_fields()->{$f}) {
1130         $f .= '__sort';
1131     } else {
1132         # We need to add '.raw' to text fields without a sort field,
1133         # otherwise it'll sort based on the tokenised form.
1134         $f .= '.raw' if $textField;
1135     }
1136     return $f;
1137 }
1138
1139 =head2 _truncate_terms
1140
1141     my $query = $self->_truncate_terms($query);
1142
1143 Given a string query this function appends '*' wildcard  to all terms except
1144 operands and double quoted strings.
1145
1146 =cut
1147
1148 sub _truncate_terms {
1149     my ( $self, $query ) = @_;
1150
1151     my @tokens = $self->_split_query( $query );
1152
1153     # Filter out empty tokens
1154     my @words = grep { $_ !~ /^\s*$/ } @tokens;
1155
1156     # Append '*' to words if needed, ie. if it ends in a word character and is not a keyword
1157     my @terms = map {
1158         my $w = $_;
1159         (/\W$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*";
1160     } @words;
1161
1162     return join ' ', @terms;
1163 }
1164
1165 =head2 _split_query
1166
1167     my @token = $self->_split_query($query_str);
1168
1169 Given a string query this function splits it to tokens taking into account
1170 any field prefixes and quoted strings.
1171
1172 =cut
1173
1174 my $tokenize_split_re = qr/((?:${field_name_pattern}${multi_field_pattern}:)?"[^"]+"|\s+)/;
1175
1176 sub _split_query {
1177     my ( $self, $query ) = @_;
1178
1179     # '"donald duck" title:"the mouse" and peter" get split into
1180     # ['', '"donald duck"', '', ' ', '', 'title:"the mouse"', '', ' ', 'and', ' ', 'pete']
1181     my @tokens = split $tokenize_split_re, $query;
1182
1183     # Filter out empty values
1184     @tokens = grep( /\S/, @tokens );
1185
1186     return @tokens;
1187 }
1188
1189 =head2 _search_fields
1190     my $weighted_fields = $self->_search_fields({
1191         is_opac => 0,
1192         weighted_fields => 1,
1193         subfield => 'raw'
1194     });
1195
1196 Generate a list of searchable fields to be used for Elasticsearch queries
1197 applied to multiple fields.
1198
1199 Returns an arrayref of field names for either OPAC or staff interface, with
1200 possible weights and subfield appended to each field name depending on the
1201 options provided.
1202
1203 =over 4
1204
1205 =item C<$params>
1206
1207 Hashref with options. The parameter C<is_opac> indicates whether the searchable
1208 fields for OPAC or staff interface should be retrieved. If C<weighted_fields> is set
1209 fields weights will be applied on returned fields. C<subfield> can be used to
1210 provide a subfield that will be appended to fields as "C<field_name>.C<subfield>".
1211
1212 =back
1213
1214 =cut
1215
1216 sub _search_fields {
1217     my ($self, $params) = @_;
1218     $params //= {
1219         is_opac => 0,
1220         weighted_fields => 0,
1221         whole_record => 0,
1222         # This is a hack for authorities build_authorities_query
1223         # can hopefully be removed in the future
1224         subfield => undef,
1225     };
1226     my $cache = Koha::Caches->get_instance();
1227     my $cache_key = 'elasticsearch_search_fields' . ($params->{is_opac} ? '_opac' : '_staff_client') . "_" . $self->index;
1228     my $search_fields = $cache->get_from_cache($cache_key, { unsafe => 1 });
1229     if (!$search_fields) {
1230         # The reason we don't use Koha::SearchFields->search here is we don't
1231         # want or need resultset wrapped as Koha::SearchField object.
1232         # It does not make any sense in this context and would cause
1233         # unnecessary overhead sice we are only querying for data
1234         # Also would not work, or produce strange results, with the "columns"
1235         # option.
1236         my $schema = Koha::Database->schema;
1237         my $result = $schema->resultset('SearchField')->search(
1238             {
1239                 $params->{is_opac} ? (
1240                     'opac' => 1,
1241                 ) : (
1242                     'staff_client' => 1
1243                 ),
1244                 'type' => { '!=' => 'boolean' },
1245                 'search_marc_map.index_name' => $self->index,
1246                 'search_marc_map.marc_type' => C4::Context->preference('marcflavour'),
1247                 'search_marc_to_fields.search' => 1,
1248             },
1249             {
1250                 columns => [qw/name weight/],
1251                 collapse => 1,
1252                 join => {search_marc_to_fields => 'search_marc_map'},
1253             }
1254         );
1255         my @search_fields;
1256         while (my $search_field = $result->next) {
1257             push @search_fields, [
1258                 lc $search_field->name,
1259                 $search_field->weight ? $search_field->weight : ()
1260             ];
1261         }
1262         $search_fields = \@search_fields;
1263         $cache->set_in_cache($cache_key, $search_fields);
1264     }
1265     if ($params->{subfield}) {
1266         my $subfield = $params->{subfield};
1267         $search_fields = [
1268             map {
1269                 # Copy values to avoid mutating cached
1270                 # data (since unsafe is used)
1271                 my ($field, $weight) = @{$_};
1272                 ["${field}.${subfield}", $weight];
1273             } @{$search_fields}
1274         ];
1275     }
1276     if ($params->{weighted_fields}) {
1277         return [map { join('^', @{$_}) } @{$search_fields}];
1278     }
1279     else {
1280         # Exclude weight from field
1281         return [map { $_->[0] } @{$search_fields}];
1282     }
1283 }
1284
1285 1;