1 package Koha::SearchEngine::Elasticsearch::QueryBuilder;
3 # This file is part of Koha.
5 # Copyright 2014 Catalyst IT Ltd.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
22 Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch
23 query objects from user-supplied queries
27 This provides the functions that take a user-supplied search query, and
28 provides something that can be given to elasticsearch to get answers.
32 use Koha::SearchEngine::Elasticsearch::QueryBuilder;
33 $builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
34 my $simple_query = $builder->build_query("hello");
35 # This is currently undocumented because the original code is undocumented
36 my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
42 use base qw(Koha::SearchEngine::Elasticsearch);
45 use List::MoreUtils qw/ each_array /;
54 my $simple_query = $builder->build_query("hello", %options)
56 This will build a query that can be issued to elasticsearch from the provided
57 string input. This expects a lucene style search form (see
58 L<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax>
61 It'll make an attempt to respect the various query options.
63 Additional options can be provided with the C<%options> hash.
69 This should be an arrayref of hashrefs, each containing a C<field> and an
70 C<direction> (optional, defaults to C<asc>.) The results will be sorted
71 according to these values. Valid values for C<direction> are 'asc' and 'desc'.
78 my ( $self, $query, %options ) = @_;
80 my $stemming = C4::Context->preference("QueryStemming") || 0;
81 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
82 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
83 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
85 $query = '*' unless defined $query;
91 fuzziness => $fuzzy_enabled ? 'auto' : '0',
92 default_operator => 'AND',
93 default_field => '_all',
94 lenient => JSON::true,
95 fields => $options{fields} || [],
99 if ( $options{sort} ) {
100 foreach my $sort ( @{ $options{sort} } ) {
101 my ( $f, $d ) = @$sort{qw/ field direction /};
102 die "Invalid sort direction, $d"
103 if $d && ( $d ne 'asc' && $d ne 'desc' );
104 $d = 'asc' unless $d;
106 $f = $self->_sort_field($f);
107 push @{ $res->{sort} }, { $f => { order => $d } };
111 # See _convert_facets in Search.pm for how these get turned into
112 # things that Koha can use.
113 $res->{aggregations} = {
114 author => { terms => { field => "author__facet" } },
115 subject => { terms => { field => "subject__facet" } },
116 itype => { terms => { field => "itype__facet" } },
117 location => { terms => { field => "location__facet" } },
118 'su-geo' => { terms => { field => "su-geo__facet" } },
119 'title-series' => { terms => { field => "title-series__facet" } },
120 ccode => { terms => { field => "ccode__facet" } },
123 my $display_library_facets = C4::Context->preference('DisplayLibraryFacets');
124 if ( $display_library_facets eq 'both'
125 or $display_library_facets eq 'home' ) {
126 $res->{aggregations}{homebranch} = { terms => { field => "homebranch__facet" } };
128 if ( $display_library_facets eq 'both'
129 or $display_library_facets eq 'holding' ) {
130 $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet" } };
132 if ( my $ef = $options{expanded_facet} ) {
133 $res->{aggregations}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount');
138 =head2 build_browse_query
140 my $browse_query = $builder->build_browse_query($field, $query);
142 This performs a "starts with" style query on a particular field. The field
143 to be searched must have been indexed with an appropriate mapping as a
144 "phrase" subfield, which pretty much everything has.
148 # XXX this isn't really a browse query like we want in the end
149 sub build_browse_query {
150 my ( $self, $field, $query ) = @_;
152 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
154 return { query => '*' } if !defined $query;
156 # TODO this should come from Koha::SearchEngine::Elasticsearch
157 my %field_whitelist = (
161 $field = 'title' if !exists $field_whitelist{$field};
162 my $sort = $self->_sort_field($field);
165 match_phrase_prefix => {
169 fuzziness => $fuzzy_enabled ? 'auto' : '0',
173 sort => [ { $sort => { order => "asc" } } ],
177 =head2 build_query_compat
180 $error, $query, $simple_query, $query_cgi,
181 $query_desc, $limit, $limit_cgi, $limit_desc,
182 $stopwords_removed, $query_type
184 = $builder->build_query_compat( \@operators, \@operands, \@indexes,
185 \@limits, \@sort_by, $scan, $lang );
187 This handles a search using the same api as L<C4::Search::buildQuery> does.
189 A very simple query will go in with C<$operands> set to ['query'], and
190 C<$sort_by> set to ['pubdate_dsc']. This simple case will return with
191 C<$query> set to something that can perform the search, C<$simple_query>
192 set to just the search term, C<$query_cgi> set to something that can
193 reproduce this search, and C<$query_desc> set to something else.
197 sub build_query_compat {
198 my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan,
202 #die Dumper ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan, $lang );
203 my @sort_params = $self->_convert_sort_fields(@$sort_by);
204 my @index_params = $self->_convert_index_fields(@$indexes);
205 my $limits = $self->_fix_limit_special_cases($orig_limits);
206 if ( $params->{suppress} ) { push @$limits, "suppress:0"; }
208 # Merge the indexes in with the search terms and the operands so that
209 # each search thing is a handy unit.
210 unshift @$operators, undef; # The first one can't have an op
212 my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
213 my $ea = each_array( @$operands, @$operators, @index_params );
214 while ( my ( $oand, $otor, $index ) = $ea->() ) {
215 next if ( !defined($oand) || $oand eq '' );
216 $oand = $self->_clean_search_term($oand);
217 $oand = $self->_truncate_terms($oand) if ($truncate);
218 push @search_params, {
219 operand => $oand, # the search terms
220 operator => defined($otor) ? uc $otor : undef, # AND and so on
221 $index ? %$index : (),
225 # We build a string query from limits and the queries. An alternative
226 # would be to pass them separately into build_query and let it build
227 # them into a structured ES query itself. Maybe later, though that'd be
229 my $query_str = join( ' AND ',
230 join( ' ', $self->_create_query_string(@search_params) ) || (),
231 $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
234 if ( defined($params->{weighted_fields}) && $params->{weighted_fields} ) {
235 push @fields, sprintf("%s^%s", $_->name, $_->weight) for Koha::SearchFields->weighted_fields;
238 # If there's no query on the left, let's remove the junk left behind
239 $query_str =~ s/^ AND //;
241 $options{fields} = \@fields;
242 $options{sort} = \@sort_params;
243 $options{expanded_facet} = $params->{expanded_facet};
244 my $query = $self->build_query( $query_str, %options );
247 # We roughly emulate the CGI parameters of the zebra query builder
249 $query_cgi = 'q=' . uri_escape_utf8( $operands->[0] ) if @$operands;
251 $simple_query = $operands->[0] if @$operands == 1;
252 my $query_desc = $simple_query;
253 my $limit = $self->_join_queries( $self->_convert_index_strings(@$limits));
254 my $limit_cgi = ( $orig_limits and @$orig_limits )
255 ? '&limit=' . join( '&limit=', map { uri_escape_utf8($_) } @$orig_limits )
258 $limit_desc = "$limit" if $limit;
260 undef, $query, $simple_query, $query_cgi, $query_desc,
261 $limit, $limit_cgi, $limit_desc, undef, undef
265 =head2 build_authorities_query
267 my $query = $builder->build_authorities_query(\%search);
269 This takes a nice description of an authority search and turns it into a black-box
270 query that can then be passed to the appropriate searcher.
272 The search description is a hashref that looks something like:
277 where => 'Heading', # search the main entry
278 operator => 'exact', # require an exact match
279 value => 'frogs', # the search string
282 where => '', # search all entries
283 operator => '', # default keyword, right truncation
291 authtypecode => 'TOPIC_TERM',
296 sub build_authorities_query {
297 my ( $self, $search ) = @_;
299 # Start by making the query parts
302 foreach my $s ( @{ $search->{searches} } ) {
303 my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
304 $wh = '_all' if $wh eq '';
305 if ( $op eq 'is' || $op eq '=' || $op eq 'exact' ) {
307 # look for something that matches a term completely
308 # note, '=' is about numerical vals. May need special handling.
309 # Also, we lowercase our search because the ES
310 # index lowercases its values, and term searches don't get the
311 # search analyzer applied to them.
312 push @query_parts, { match_phrase => {"$wh.phrase" => lc $val} };
314 elsif ( $op eq 'start' ) {
315 # startswith search, uses lowercase untokenized version of heading
316 push @query_parts, { match_phrase_prefix => {"$wh.phrase" => lc $val} };
319 # regular wordlist stuff
320 my @tokens = $self->_split_query( $val );
321 foreach my $token ( @tokens ) {
322 $token = $self->_truncate_terms(
323 $self->_clean_search_term( $token )
326 my $query = $self->_join_queries( @tokens );
327 push @query_parts, { query_string => { default_field => $wh, query => $query } };
331 # Merge the query parts appropriately
332 # 'should' behaves like 'or'
333 # 'must' behaves like 'and'
334 # Zebra results seem to match must so using that here
335 my $query = { query =>
337 { must => \@query_parts }
342 if ( exists $search->{sort} ) {
343 foreach my $k ( keys %{ $search->{sort} } ) {
344 my $f = $self->_sort_field($k);
345 $s{$f} = $search->{sort}{$k};
347 $search->{sort} = \%s;
351 $query->{sort} = [ $search->{sort} ] if exists $search->{sort};
357 =head2 build_authorities_query_compat
360 $builder->build_authorities_query_compat( \@marclist, \@and_or,
361 \@excluding, \@operator, \@value, $authtypecode, $orderby );
363 This builds a query for searching for authorities, in the style of
364 L<C4::AuthoritiesMarc::SearchAuthorities>.
372 An arrayref containing where the particular term should be searched for.
373 Options are: mainmainentry, mainentry, match, match-heading, see-from, and
374 thesaurus. If left blank, any field is used.
378 Totally ignored. It is never used in L<C4::AuthoritiesMarc::SearchAuthorities>.
386 What form of search to do. Options are: is (phrase, no truncation, whole field
387 must match), = (number exact match), exact (phrase, no truncation, whole field
388 must match). If left blank, then word list, right truncated, anywhere is used.
392 The actual user-provided string value to search for.
396 The authority type code to search within. If blank, then all will be searched.
400 The order to sort the results by. Options are Relevance, HeadingAsc,
401 HeadingDsc, AuthidAsc, AuthidDsc.
405 marclist, operator, and value must be the same length, and the values at
406 index /i/ all relate to each other.
408 This returns a query, which is a black box object that can be passed to the
409 appropriate search object.
413 our $koha_to_index_name = {
414 mainmainentry => 'heading-main',
415 mainentry => 'heading',
417 'match-heading' => 'match-heading',
418 'see-from' => 'match-heading-see-from',
419 thesaurus => 'subject-heading-thesaurus',
424 sub build_authorities_query_compat {
425 my ( $self, $marclist, $and_or, $excluding, $operator, $value,
426 $authtypecode, $orderby )
429 # This turns the old-style many-options argument form into a more
430 # extensible hash form that is understood by L<build_authorities_query>.
433 # Convert to lower case
434 $marclist = [map(lc, @{$marclist})];
435 $orderby = lc $orderby;
437 # Make sure everything exists
438 foreach my $m (@$marclist) {
439 Koha::Exceptions::WrongParameter->throw("Invalid marclist field provided: $m")
440 unless exists $koha_to_index_name->{$m};
442 for ( my $i = 0 ; $i < @$value ; $i++ ) {
443 next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches
446 where => $koha_to_index_name->{$marclist->[$i]},
447 operator => $operator->[$i],
448 value => $value->[$i],
454 ( $orderby =~ /^heading/ ) ? 'heading'
455 : ( $orderby =~ /^auth/ ) ? 'local-number'
458 my $sort_order = ( $orderby =~ /asc$/ ) ? 'asc' : 'desc';
459 %sort = ( $sort_field => $sort_order, );
462 searches => \@searches,
463 authtypecode => $authtypecode,
465 $search{sort} = \%sort if %sort;
466 my $query = $self->build_authorities_query( \%search );
470 =head2 _convert_sort_fields
472 my @sort_params = _convert_sort_fields(@sort_by)
474 Converts the zebra-style sort index information into elasticsearch-style.
476 C<@sort_by> is the same as presented to L<build_query_compat>, and it returns
477 something that can be sent to L<build_query>.
481 sub _convert_sort_fields {
482 my ( $self, @sort_by ) = @_;
484 # Turn the sorting into something we care about.
485 my %sort_field_convert = (
486 acqdate => 'date-of-acquisition',
488 call_number => 'local-classification',
489 popularity => 'issues',
490 relevance => undef, # default
492 pubdate => 'date-of-publication',
494 my %sort_order_convert =
495 ( qw( desc desc ), qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) );
497 # Convert the fields and orders, drop anything we don't know about.
498 grep { $_->{field} } map {
499 my ( $f, $d ) = /(.+)_(.+)/;
501 field => $sort_field_convert{$f},
502 direction => $sort_order_convert{$d}
507 =head2 _convert_index_fields
509 my @index_params = $self->_convert_index_fields(@indexes);
511 Converts zebra-style search index notation into elasticsearch-style.
513 C<@indexes> is an array of index names, as presented to L<build_query_compat>,
514 and it returns something that can be sent to L<build_query>.
516 B<TODO>: this will pull from the elasticsearch mappings table to figure out
521 our %index_field_convert = (
525 'lcn' => 'local-classification',
526 'callnum' => 'local-classification',
527 'record-type' => 'rtype',
528 'mc-rtype' => 'rtype',
530 'lc-card' => 'lc-card-number',
531 'sn' => 'local-number',
532 'yr' => 'date-of-publication',
533 'pubdate' => 'date-of-publication',
534 'acqdate' => 'date-of-acquisition',
535 'date/time-last-modified' => 'date-time-last-modified',
536 'dtlm' => 'date/time-last-modified',
537 'diss' => 'dissertation-information',
540 'music-number' => 'identifier-publisher-for-music',
541 'number-music-publisher' => 'identifier-publisher-for-music',
542 'music' => 'identifier-publisher-for-music',
543 'ident' => 'identifier-standard',
544 'cpn' => 'corporate-name',
545 'cfn' => 'conference-name',
546 'pn' => 'personal-name',
551 'rcn' => 'record-control-number',
553 'su-to' => 'subject',
554 #'su-geo' => 'subject',
555 'su-ut' => 'subject',
557 'se' => 'title-series',
558 'ut' => 'title-uniform',
559 'an' => 'koha-auth-number',
562 'rank' => 'relevance',
563 'phr' => 'st-phrase',
564 'wrdl' => 'st-word-list',
565 'rt' => 'right-truncation',
566 'rtrn' => 'right-truncation',
567 'ltrn' => 'left-truncation',
568 'rltrn' => 'left-and-right',
569 'mc-itemtype' => 'itemtype',
570 'mc-ccode' => 'ccode',
571 'branch' => 'homebranch',
572 'mc-loc' => 'location',
573 'stocknumber' => 'number-local-acquisition',
574 'inv' => 'number-local-acquisition',
576 'mc-itype' => 'itype',
577 'aub' => 'author-personal-bibliography',
578 'auo' => 'author-in-order',
582 'frequency-code' => 'ff8-18',
583 'illustration-code' => 'ff8-18-21',
584 'regularity-code' => 'ff8-19',
585 'type-of-serial' => 'ff8-21',
586 'format' => 'ff8-23',
587 'conference-code' => 'ff8-29',
588 'festschrift-indicator' => 'ff8-30',
589 'index-indicator' => 'ff8-31',
592 'literature-code' => 'lf',
593 'biography' => 'bio',
595 'biography-code' => 'bio',
596 'l-format' => 'ff7-01-02',
597 'lex' => 'lexile-number',
598 'hi' => 'host-item-number',
599 'itu' => 'index-term-uncontrolled',
600 'itg' => 'index-term-genre',
602 my $field_name_pattern = '[\w\-]+';
603 my $multi_field_pattern = "(?:\\.$field_name_pattern)*";
605 sub _convert_index_fields {
606 my ( $self, @indexes ) = @_;
608 my %index_type_convert =
609 ( __default => undef, phr => 'phrase', rtrn => 'right-truncate' );
611 # Convert according to our table, drop anything that doesn't convert.
612 # If a field starts with mc- we save it as it's used (and removed) later
613 # when joining things, to indicate we make it an 'OR' join.
614 # (Sorry, this got a bit ugly after special cases were found.)
615 grep { $_->{field} } map {
616 # Lower case all field names
617 my ( $f, $t ) = map(lc, split /,/);
624 field => exists $index_field_convert{$f} ? $index_field_convert{$f} : $f,
625 type => $index_type_convert{ $t // '__default' }
627 $r->{field} = ($mc . $r->{field}) if $mc && $r->{field};
632 =head2 _convert_index_strings
634 my @searches = $self->_convert_index_strings(@searches);
636 Similar to L<_convert_index_fields>, this takes strings of the form
637 B<field:search term> and rewrites the field from zebra-style to
638 elasticsearch-style. Anything it doesn't understand is returned verbatim.
642 sub _convert_index_strings {
643 my ( $self, @searches ) = @_;
645 foreach my $s (@searches) {
647 my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/;
648 unless ( defined($field) && defined($term) ) {
652 my ($conv) = $self->_convert_index_fields($field);
653 unless ( defined($conv) ) {
657 push @res, $conv->{field} . ":"
658 . $self->_modify_string_by_type( %$conv, operand => $term );
663 =head2 _convert_index_strings_freeform
665 my $search = $self->_convert_index_strings_freeform($search);
667 This is similar to L<_convert_index_strings>, however it'll search out the
668 things to change within the string. So it can handle strings such as
669 C<(su:foo) AND (su:bar)>, converting the C<su> appropriately.
671 If there is something of the form "su,complete-subfield" or something, the
672 second part is stripped off as we can't yet handle that. Making it work
673 will have to wait for a real query parser.
677 sub _convert_index_strings_freeform {
678 my ( $self, $search ) = @_;
679 # @TODO: Currenty will alter also fields contained within quotes:
680 # `searching for "stuff cn:123"` for example will become
681 # `searching for "stuff local-number:123"
683 # Fixing this is tricky, one possibility:
684 # https://stackoverflow.com/questions/19193876/perl-regex-to-match-a-string-that-is-not-enclosed-in-quotes
685 # Still not perfect, and will not handle escaped quotes within quotes and assumes balanced quotes.
687 # Another, not so elegant, solution could be to replace all quoted content with placeholders, and put
688 # them back when processing is done.
690 # Lower case field names
691 $search =~ s/($field_name_pattern)(?:,[\w-]*)?($multi_field_pattern):/\L$1\E$2:/og;
692 # Resolve possible field aliases
693 $search =~ s/($field_name_pattern)($multi_field_pattern):/(exists $index_field_convert{$1} ? $index_field_convert{$1} : $1)."$2:"/oge;
697 =head2 _modify_string_by_type
699 my $str = $self->_modify_string_by_type(%index_field);
701 If you have a search term (operand) and a type (phrase, right-truncated), this
702 will convert the string to have the function in lucene search terms, e.g.
703 wrapping quotes around it.
707 sub _modify_string_by_type {
708 my ( $self, %idx ) = @_;
710 my $type = $idx{type} || '';
711 my $str = $idx{operand};
712 return $str unless $str; # Empty or undef, we can't use it.
714 $str .= '*' if $type eq 'right-truncate';
715 $str = '"' . $str . '"' if $type eq 'phrase';
721 my $query_str = $self->_join_queries(@query_parts);
723 This takes a list of query parts, that might be search terms on their own, or
724 booleaned together, or specifying fields, or whatever, wraps them in
725 parentheses, and ANDs them all together. Suitable for feeding to the ES
728 Note: doesn't AND them together if they specify an index that starts with "mc"
729 as that was a special case in the original code for dealing with multiple
730 choice options (you can't search for something that has an itype of A and
731 and itype of B otherwise.)
736 my ( $self, @parts ) = @_;
738 my @norm_parts = grep { defined($_) && $_ ne '' && $_ !~ /^mc-/ } @parts;
740 map { s/^mc-//r } grep { defined($_) && $_ ne '' && $_ =~ /^mc-/ } @parts;
741 return () unless @norm_parts + @mc_parts;
742 return ( @norm_parts, @mc_parts )[0] if @norm_parts + @mc_parts == 1;
744 @mc_parts ? '(' . ( join ' OR ', map { "($_)" } @mc_parts ) . ')' : ();
746 # Handy trick: $x || () inside a join means that if $x ends up as an
747 # empty string, it gets replaced with (), which makes join ignore it.
748 # (bad effect: this'll also happen to '0', this hopefully doesn't matter
751 join( ' AND ', map { "($_)" } @norm_parts ) || (),
757 my @phrased_queries = $self->_make_phrases(@query_parts);
759 This takes the supplied queries and forces them to be phrases by wrapping
760 quotes around them. It understands field prefixes, e.g. 'subject:' and puts
761 the quotes outside of them if they're there.
766 my ( $self, @parts ) = @_;
767 map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts;
770 =head2 _create_query_string
772 my @query_strings = $self->_create_query_string(@queries);
774 Given a list of hashrefs, it will turn them into a lucene-style query string.
775 The hash should contain field, type (both for the indexes), operator, and
780 sub _create_query_string {
781 my ( $self, @queries ) = @_;
784 my $otor = $_->{operator} ? $_->{operator} . ' ' : '';
785 my $field = $_->{field} ? $_->{field} . ':' : '';
787 my $oand = $self->_modify_string_by_type(%$_);
788 "$otor($field$oand)";
792 =head2 _clean_search_term
794 my $term = $self->_clean_search_term($term);
796 This cleans a search term by removing any funny characters that may upset
797 ES and give us an error. It also calls L<_convert_index_strings_freeform>
798 to ensure those parts are correct.
802 sub _clean_search_term {
803 my ( $self, $term ) = @_;
805 # Some hardcoded searches (like with authorities) produce things like
806 # 'an=123', when it ought to be 'an:123' for our purposes.
808 $term = $self->_convert_index_strings_freeform($term);
813 =head2 _fix_limit_special_cases
815 my $limits = $self->_fix_limit_special_cases($limits);
817 This converts any special cases that the limit specifications have into things
818 that are more readily processable by the rest of the code.
820 The argument should be an arrayref, and it'll return an arrayref.
824 sub _fix_limit_special_cases {
825 my ( $self, $limits ) = @_;
828 foreach my $l (@$limits) {
830 # This is set up by opac-search.pl
831 if ( $l =~ /^yr,st-numeric,ge=/ ) {
832 my ( $start, $end ) =
833 ( $l =~ /^yr,st-numeric,ge=(.*) and yr,st-numeric,le=(.*)$/ );
834 next unless defined($start) && defined($end);
835 push @new_lim, "copydate:[$start TO $end]";
837 elsif ( $l =~ /^yr,st-numeric=/ ) {
838 my ($date) = ( $l =~ /^yr,st-numeric=(.*)$/ );
839 next unless defined($date);
840 push @new_lim, "copydate:$date";
842 elsif ( $l =~ /^available$/ ) {
843 push @new_lim, 'onloan:0';
854 my $field = $self->_sort_field($field);
856 Given a field name, this works out what the actual name of the field to sort
857 on should be. A '__sort' suffix is added for fields with a sort version, and
858 for text fields either '.phrase' (for sortable versions) or '.raw' is appended
859 to avoid sorting on a tokenized value.
866 my $mappings = $self->get_elasticsearch_mappings();
867 my $textField = defined $mappings->{data}{properties}{$f}{type} && $mappings->{data}{properties}{$f}{type} eq 'text';
868 if (!defined $self->sort_fields()->{$f} || $self->sort_fields()->{$f}) {
870 # We need to add '.phrase' to text fields, otherwise it'll sort
871 # based on the tokenised form.
872 $f .= '.phrase' if $textField;
874 # We need to add '.raw' to text fields without a sort field,
875 # otherwise it'll sort based on the tokenised form.
876 $f .= '.raw' if $textField;
881 =head2 _truncate_terms
883 my $query = $self->_truncate_terms($query);
885 Given a string query this function appends '*' wildcard to all terms except
886 operands and double quoted strings.
890 sub _truncate_terms {
891 my ( $self, $query ) = @_;
893 my @tokens = $self->_split_query( $query );
895 # Filter out empty tokens
896 my @words = grep { $_ !~ /^\s*$/ } @tokens;
898 # Append '*' to words if needed, ie. if it ends in a word character and is not a keyword
901 (/\W$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*";
904 return join ' ', @terms;
909 my @token = $self->_split_query($query_str);
911 Given a string query this function splits it to tokens taking into account
912 any field prefixes and quoted strings.
916 my $tokenize_split_re = qr/((?:${field_name_pattern}${multi_field_pattern}:)?"[^"]+"|\s+)/;
919 my ( $self, $query ) = @_;
921 # '"donald duck" title:"the mouse" and peter" get split into
922 # ['', '"donald duck"', '', ' ', '', 'title:"the mouse"', '', ' ', 'and', ' ', 'pete']
923 my @tokens = split $tokenize_split_re, $query;
925 # Filter out empty values
926 @tokens = grep( /\S/, @tokens );