1 package Koha::SearchEngine::Elasticsearch::QueryBuilder;
3 # This file is part of Koha.
5 # Copyright 2014 Catalyst IT Ltd.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
22 Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch
23 query objects from user-supplied queries
27 This provides the functions that take a user-supplied search query, and
28 provides something that can be given to elasticsearch to get answers.
32 use Koha::SearchEngine::Elasticsearch::QueryBuilder;
33 $builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
34 my $simple_query = $builder->build_query("hello");
35 # This is currently undocumented because the original code is undocumented
36 my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
42 use base qw(Koha::SearchEngine::Elasticsearch);
45 use List::MoreUtils qw/ each_array /;
54 my $simple_query = $builder->build_query("hello", %options)
56 This will build a query that can be issued to elasticsearch from the provided
57 string input. This expects a lucene style search form (see
58 L<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html#query-string-syntax>
61 It'll make an attempt to respect the various query options.
63 Additional options can be provided with the C<%options> hash.
69 This should be an arrayref of hashrefs, each containing a C<field> and an
70 C<direction> (optional, defaults to C<asc>.) The results will be sorted
71 according to these values. Valid values for C<direction> are 'asc' and 'desc'.
78 my ( $self, $query, %options ) = @_;
80 my $stemming = C4::Context->preference("QueryStemming") || 0;
81 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
82 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
83 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
85 $query = '*' unless defined $query;
91 fuzziness => $fuzzy_enabled ? 'auto' : '0',
92 default_operator => 'AND',
93 default_field => '_all',
94 lenient => JSON::true,
95 fields => $options{fields} || [],
99 if ( $options{sort} ) {
100 foreach my $sort ( @{ $options{sort} } ) {
101 my ( $f, $d ) = @$sort{qw/ field direction /};
102 die "Invalid sort direction, $d"
103 if $d && ( $d ne 'asc' && $d ne 'desc' );
104 $d = 'asc' unless $d;
106 $f = $self->_sort_field($f);
107 push @{ $res->{sort} }, { $f => { order => $d } };
111 # See _convert_facets in Search.pm for how these get turned into
112 # things that Koha can use.
113 $res->{aggregations} = {
114 author => { terms => { field => "author__facet" } },
115 subject => { terms => { field => "subject__facet" } },
116 itype => { terms => { field => "itype__facet" } },
117 location => { terms => { field => "location__facet" } },
118 'su-geo' => { terms => { field => "su-geo__facet" } },
119 'title-series' => { terms => { field => "title-series__facet" } },
120 ccode => { terms => { field => "ccode__facet" } },
121 ln => { terms => { field => "ln__facet" } },
124 my $display_library_facets = C4::Context->preference('DisplayLibraryFacets');
125 if ( $display_library_facets eq 'both'
126 or $display_library_facets eq 'home' ) {
127 $res->{aggregations}{homebranch} = { terms => { field => "homebranch__facet" } };
129 if ( $display_library_facets eq 'both'
130 or $display_library_facets eq 'holding' ) {
131 $res->{aggregations}{holdingbranch} = { terms => { field => "holdingbranch__facet" } };
133 if ( my $ef = $options{expanded_facet} ) {
134 $res->{aggregations}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount');
139 =head2 build_browse_query
141 my $browse_query = $builder->build_browse_query($field, $query);
143 This performs a "starts with" style query on a particular field. The field
144 to be searched must have been indexed with an appropriate mapping as a
145 "phrase" subfield, which pretty much everything has.
149 # XXX this isn't really a browse query like we want in the end
150 sub build_browse_query {
151 my ( $self, $field, $query ) = @_;
153 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
155 return { query => '*' } if !defined $query;
157 # TODO this should come from Koha::SearchEngine::Elasticsearch
158 my %field_whitelist = (
162 $field = 'title' if !exists $field_whitelist{$field};
163 my $sort = $self->_sort_field($field);
166 match_phrase_prefix => {
170 fuzziness => $fuzzy_enabled ? 'auto' : '0',
174 sort => [ { $sort => { order => "asc" } } ],
178 =head2 build_query_compat
181 $error, $query, $simple_query, $query_cgi,
182 $query_desc, $limit, $limit_cgi, $limit_desc,
183 $stopwords_removed, $query_type
185 = $builder->build_query_compat( \@operators, \@operands, \@indexes,
186 \@limits, \@sort_by, $scan, $lang );
188 This handles a search using the same api as L<C4::Search::buildQuery> does.
190 A very simple query will go in with C<$operands> set to ['query'], and
191 C<$sort_by> set to ['pubdate_dsc']. This simple case will return with
192 C<$query> set to something that can perform the search, C<$simple_query>
193 set to just the search term, C<$query_cgi> set to something that can
194 reproduce this search, and C<$query_desc> set to something else.
198 sub build_query_compat {
199 my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan,
203 #die Dumper ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan, $lang );
204 my @sort_params = $self->_convert_sort_fields(@$sort_by);
205 my @index_params = $self->_convert_index_fields(@$indexes);
206 my $limits = $self->_fix_limit_special_cases($orig_limits);
207 if ( $params->{suppress} ) { push @$limits, "suppress:0"; }
209 # Merge the indexes in with the search terms and the operands so that
210 # each search thing is a handy unit.
211 unshift @$operators, undef; # The first one can't have an op
213 my $truncate = C4::Context->preference("QueryAutoTruncate") || 0;
214 my $ea = each_array( @$operands, @$operators, @index_params );
215 while ( my ( $oand, $otor, $index ) = $ea->() ) {
216 next if ( !defined($oand) || $oand eq '' );
217 $oand = $self->_clean_search_term($oand);
218 $oand = $self->_truncate_terms($oand) if ($truncate);
219 push @search_params, {
220 operand => $oand, # the search terms
221 operator => defined($otor) ? uc $otor : undef, # AND and so on
222 $index ? %$index : (),
226 # We build a string query from limits and the queries. An alternative
227 # would be to pass them separately into build_query and let it build
228 # them into a structured ES query itself. Maybe later, though that'd be
230 my $search_param_query_str = join( ' ', $self->_create_query_string(@search_params) );
231 my $query_str = join( ' AND ',
232 $search_param_query_str || (),
233 $self->_join_queries( $self->_convert_index_strings(@$limits) ) || () );
236 if ( defined($params->{weighted_fields}) && $params->{weighted_fields} ) {
237 push @fields, sprintf("%s^%s", $_->name, $_->weight) for Koha::SearchFields->weighted_fields;
240 # If there's no query on the left, let's remove the junk left behind
241 $query_str =~ s/^ AND //;
243 $options{fields} = \@fields;
244 $options{sort} = \@sort_params;
245 $options{expanded_facet} = $params->{expanded_facet};
246 my $query = $self->build_query( $query_str, %options );
248 # We roughly emulate the CGI parameters of the zebra query builder
250 shift @$operators; # Shift out the one we unshifted before
251 $ea = each_array( @$operands, @$operators, @$indexes );
252 while ( my ( $oand, $otor, $index ) = $ea->() ) {
253 $query_cgi .= '&' if $query_cgi;
254 $query_cgi .= 'idx=' . uri_escape_utf8( $index // '') . '&q=' . uri_escape_utf8( $oand );
255 $query_cgi .= '&op=' . uri_escape_utf8( $otor ) if $otor;
257 $query_cgi .= '&scan=1' if ( $scan );
260 $simple_query = $operands->[0] if @$operands == 1;
262 if ( $simple_query ) {
263 $query_desc = $simple_query;
265 $query_desc = $search_param_query_str;
267 my $limit = $self->_join_queries( $self->_convert_index_strings(@$limits));
268 my $limit_cgi = ( $orig_limits and @$orig_limits )
269 ? '&limit=' . join( '&limit=', map { uri_escape_utf8($_) } @$orig_limits )
272 $limit_desc = "$limit" if $limit;
275 undef, $query, $simple_query, $query_cgi, $query_desc,
276 $limit, $limit_cgi, $limit_desc, undef, undef
280 =head2 build_authorities_query
282 my $query = $builder->build_authorities_query(\%search);
284 This takes a nice description of an authority search and turns it into a black-box
285 query that can then be passed to the appropriate searcher.
287 The search description is a hashref that looks something like:
292 where => 'Heading', # search the main entry
293 operator => 'exact', # require an exact match
294 value => 'frogs', # the search string
297 where => '', # search all entries
298 operator => '', # default keyword, right truncation
306 authtypecode => 'TOPIC_TERM',
311 sub build_authorities_query {
312 my ( $self, $search ) = @_;
314 # Start by making the query parts
317 foreach my $s ( @{ $search->{searches} } ) {
318 my ( $wh, $op, $val ) = @{$s}{qw(where operator value)};
319 $wh = '_all' if $wh eq '';
320 if ( $op eq 'is' || $op eq '=' || $op eq 'exact' ) {
322 # look for something that matches a term completely
323 # note, '=' is about numerical vals. May need special handling.
324 # Also, we lowercase our search because the ES
325 # index lowercases its values, and term searches don't get the
326 # search analyzer applied to them.
327 push @query_parts, { match_phrase => {"$wh.phrase" => lc $val} };
329 elsif ( $op eq 'start' ) {
330 # startswith search, uses lowercase untokenized version of heading
331 push @query_parts, { match_phrase_prefix => {"$wh.phrase" => lc $val} };
334 # regular wordlist stuff
335 my @tokens = $self->_split_query( $val );
336 foreach my $token ( @tokens ) {
337 $token = $self->_truncate_terms(
338 $self->_clean_search_term( $token )
341 my $query = $self->_join_queries( @tokens );
342 push @query_parts, { query_string => { default_field => $wh, query => $query } };
346 # Merge the query parts appropriately
347 # 'should' behaves like 'or'
348 # 'must' behaves like 'and'
349 # Zebra results seem to match must so using that here
350 my $query = { query =>
352 { must => \@query_parts }
357 if ( exists $search->{sort} ) {
358 foreach my $k ( keys %{ $search->{sort} } ) {
359 my $f = $self->_sort_field($k);
360 $s{$f} = $search->{sort}{$k};
362 $search->{sort} = \%s;
366 $query->{sort} = [ $search->{sort} ] if exists $search->{sort};
372 =head2 build_authorities_query_compat
375 $builder->build_authorities_query_compat( \@marclist, \@and_or,
376 \@excluding, \@operator, \@value, $authtypecode, $orderby );
378 This builds a query for searching for authorities, in the style of
379 L<C4::AuthoritiesMarc::SearchAuthorities>.
387 An arrayref containing where the particular term should be searched for.
388 Options are: mainmainentry, mainentry, match, match-heading, see-from, and
389 thesaurus. If left blank, any field is used.
393 Totally ignored. It is never used in L<C4::AuthoritiesMarc::SearchAuthorities>.
401 What form of search to do. Options are: is (phrase, no truncation, whole field
402 must match), = (number exact match), exact (phrase, no truncation, whole field
403 must match). If left blank, then word list, right truncated, anywhere is used.
407 The actual user-provided string value to search for.
411 The authority type code to search within. If blank, then all will be searched.
415 The order to sort the results by. Options are Relevance, HeadingAsc,
416 HeadingDsc, AuthidAsc, AuthidDsc.
420 marclist, operator, and value must be the same length, and the values at
421 index /i/ all relate to each other.
423 This returns a query, which is a black box object that can be passed to the
424 appropriate search object.
428 our $koha_to_index_name = {
429 mainmainentry => 'heading-main',
430 mainentry => 'heading',
432 'match-heading' => 'match-heading',
433 'see-from' => 'match-heading-see-from',
434 thesaurus => 'subject-heading-thesaurus',
439 sub build_authorities_query_compat {
440 my ( $self, $marclist, $and_or, $excluding, $operator, $value,
441 $authtypecode, $orderby )
444 # This turns the old-style many-options argument form into a more
445 # extensible hash form that is understood by L<build_authorities_query>.
448 # Convert to lower case
449 $marclist = [map(lc, @{$marclist})];
450 $orderby = lc $orderby;
452 # Make sure everything exists
453 foreach my $m (@$marclist) {
454 Koha::Exceptions::WrongParameter->throw("Invalid marclist field provided: $m")
455 unless exists $koha_to_index_name->{$m};
457 for ( my $i = 0 ; $i < @$value ; $i++ ) {
458 next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches
461 where => $koha_to_index_name->{$marclist->[$i]},
462 operator => $operator->[$i],
463 value => $value->[$i],
469 ( $orderby =~ /^heading/ ) ? 'heading'
470 : ( $orderby =~ /^auth/ ) ? 'local-number'
473 my $sort_order = ( $orderby =~ /asc$/ ) ? 'asc' : 'desc';
474 %sort = ( $sort_field => $sort_order, );
477 searches => \@searches,
478 authtypecode => $authtypecode,
480 $search{sort} = \%sort if %sort;
481 my $query = $self->build_authorities_query( \%search );
485 =head2 _convert_sort_fields
487 my @sort_params = _convert_sort_fields(@sort_by)
489 Converts the zebra-style sort index information into elasticsearch-style.
491 C<@sort_by> is the same as presented to L<build_query_compat>, and it returns
492 something that can be sent to L<build_query>.
496 sub _convert_sort_fields {
497 my ( $self, @sort_by ) = @_;
499 # Turn the sorting into something we care about.
500 my %sort_field_convert = (
501 acqdate => 'date-of-acquisition',
503 call_number => 'local-classification',
504 popularity => 'issues',
505 relevance => undef, # default
507 pubdate => 'date-of-publication',
509 my %sort_order_convert =
510 ( qw( desc desc ), qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) );
512 # Convert the fields and orders, drop anything we don't know about.
513 grep { $_->{field} } map {
514 my ( $f, $d ) = /(.+)_(.+)/;
516 field => $sort_field_convert{$f},
517 direction => $sort_order_convert{$d}
522 =head2 _convert_index_fields
524 my @index_params = $self->_convert_index_fields(@indexes);
526 Converts zebra-style search index notation into elasticsearch-style.
528 C<@indexes> is an array of index names, as presented to L<build_query_compat>,
529 and it returns something that can be sent to L<build_query>.
531 B<TODO>: this will pull from the elasticsearch mappings table to figure out
536 our %index_field_convert = (
540 'lcn' => 'local-classification',
541 'callnum' => 'local-classification',
542 'record-type' => 'rtype',
543 'mc-rtype' => 'rtype',
545 'lc-card' => 'lc-card-number',
546 'sn' => 'local-number',
547 'yr' => 'date-of-publication',
548 'pubdate' => 'date-of-publication',
549 'acqdate' => 'date-of-acquisition',
550 'date/time-last-modified' => 'date-time-last-modified',
551 'dtlm' => 'date/time-last-modified',
552 'diss' => 'dissertation-information',
555 'music-number' => 'identifier-publisher-for-music',
556 'number-music-publisher' => 'identifier-publisher-for-music',
557 'music' => 'identifier-publisher-for-music',
558 'ident' => 'identifier-standard',
559 'cpn' => 'corporate-name',
560 'cfn' => 'conference-name',
561 'pn' => 'personal-name',
566 'rcn' => 'record-control-number',
568 'su-to' => 'subject',
569 #'su-geo' => 'subject',
570 'su-ut' => 'subject',
572 'se' => 'title-series',
573 'ut' => 'title-uniform',
574 'an' => 'koha-auth-number',
575 'authority-number' => 'koha-auth-number',
578 'rank' => 'relevance',
579 'phr' => 'st-phrase',
580 'wrdl' => 'st-word-list',
581 'rt' => 'right-truncation',
582 'rtrn' => 'right-truncation',
583 'ltrn' => 'left-truncation',
584 'rltrn' => 'left-and-right',
585 'mc-itemtype' => 'itemtype',
586 'mc-ccode' => 'ccode',
587 'branch' => 'homebranch',
588 'mc-loc' => 'location',
589 'stocknumber' => 'number-local-acquisition',
590 'inv' => 'number-local-acquisition',
592 'mc-itype' => 'itype',
593 'aub' => 'author-personal-bibliography',
594 'auo' => 'author-in-order',
598 'frequency-code' => 'ff8-18',
599 'illustration-code' => 'ff8-18-21',
600 'regularity-code' => 'ff8-19',
601 'type-of-serial' => 'ff8-21',
602 'format' => 'ff8-23',
603 'conference-code' => 'ff8-29',
604 'festschrift-indicator' => 'ff8-30',
605 'index-indicator' => 'ff8-31',
608 'literature-code' => 'lf',
609 'biography' => 'bio',
611 'biography-code' => 'bio',
612 'l-format' => 'ff7-01-02',
613 'lex' => 'lexile-number',
614 'hi' => 'host-item-number',
615 'itu' => 'index-term-uncontrolled',
616 'itg' => 'index-term-genre',
618 my $field_name_pattern = '[\w\-]+';
619 my $multi_field_pattern = "(?:\\.$field_name_pattern)*";
621 sub _convert_index_fields {
622 my ( $self, @indexes ) = @_;
624 my %index_type_convert =
625 ( __default => undef, phr => 'phrase', rtrn => 'right-truncate' );
627 # Convert according to our table, drop anything that doesn't convert.
628 # If a field starts with mc- we save it as it's used (and removed) later
629 # when joining things, to indicate we make it an 'OR' join.
630 # (Sorry, this got a bit ugly after special cases were found.)
631 grep { $_->{field} } map {
632 # Lower case all field names
633 my ( $f, $t ) = map(lc, split /,/);
640 field => exists $index_field_convert{$f} ? $index_field_convert{$f} : $f,
641 type => $index_type_convert{ $t // '__default' }
643 $r->{field} = ($mc . $r->{field}) if $mc && $r->{field};
648 =head2 _convert_index_strings
650 my @searches = $self->_convert_index_strings(@searches);
652 Similar to L<_convert_index_fields>, this takes strings of the form
653 B<field:search term> and rewrites the field from zebra-style to
654 elasticsearch-style. Anything it doesn't understand is returned verbatim.
658 sub _convert_index_strings {
659 my ( $self, @searches ) = @_;
661 foreach my $s (@searches) {
663 my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/;
664 unless ( defined($field) && defined($term) ) {
668 my ($conv) = $self->_convert_index_fields($field);
669 unless ( defined($conv) ) {
673 push @res, $conv->{field} . ":"
674 . $self->_modify_string_by_type( %$conv, operand => $term );
679 =head2 _convert_index_strings_freeform
681 my $search = $self->_convert_index_strings_freeform($search);
683 This is similar to L<_convert_index_strings>, however it'll search out the
684 things to change within the string. So it can handle strings such as
685 C<(su:foo) AND (su:bar)>, converting the C<su> appropriately.
687 If there is something of the form "su,complete-subfield" or something, the
688 second part is stripped off as we can't yet handle that. Making it work
689 will have to wait for a real query parser.
693 sub _convert_index_strings_freeform {
694 my ( $self, $search ) = @_;
695 # @TODO: Currenty will alter also fields contained within quotes:
696 # `searching for "stuff cn:123"` for example will become
697 # `searching for "stuff local-number:123"
699 # Fixing this is tricky, one possibility:
700 # https://stackoverflow.com/questions/19193876/perl-regex-to-match-a-string-that-is-not-enclosed-in-quotes
701 # Still not perfect, and will not handle escaped quotes within quotes and assumes balanced quotes.
703 # Another, not so elegant, solution could be to replace all quoted content with placeholders, and put
704 # them back when processing is done.
706 # Lower case field names
707 $search =~ s/($field_name_pattern)(?:,[\w-]*)?($multi_field_pattern):/\L$1\E$2:/og;
708 # Resolve possible field aliases
709 $search =~ s/($field_name_pattern)($multi_field_pattern):/(exists $index_field_convert{$1} ? $index_field_convert{$1} : $1)."$2:"/oge;
713 =head2 _modify_string_by_type
715 my $str = $self->_modify_string_by_type(%index_field);
717 If you have a search term (operand) and a type (phrase, right-truncated), this
718 will convert the string to have the function in lucene search terms, e.g.
719 wrapping quotes around it.
723 sub _modify_string_by_type {
724 my ( $self, %idx ) = @_;
726 my $type = $idx{type} || '';
727 my $str = $idx{operand};
728 return $str unless $str; # Empty or undef, we can't use it.
730 $str .= '*' if $type eq 'right-truncate';
731 $str = '"' . $str . '"' if $type eq 'phrase';
737 my $query_str = $self->_join_queries(@query_parts);
739 This takes a list of query parts, that might be search terms on their own, or
740 booleaned together, or specifying fields, or whatever, wraps them in
741 parentheses, and ANDs them all together. Suitable for feeding to the ES
744 Note: doesn't AND them together if they specify an index that starts with "mc"
745 as that was a special case in the original code for dealing with multiple
746 choice options (you can't search for something that has an itype of A and
747 and itype of B otherwise.)
752 my ( $self, @parts ) = @_;
754 my @norm_parts = grep { defined($_) && $_ ne '' && $_ !~ /^mc-/ } @parts;
756 map { s/^mc-//r } grep { defined($_) && $_ ne '' && $_ =~ /^mc-/ } @parts;
757 return () unless @norm_parts + @mc_parts;
758 return ( @norm_parts, @mc_parts )[0] if @norm_parts + @mc_parts == 1;
760 @mc_parts ? '(' . ( join ' OR ', map { "($_)" } @mc_parts ) . ')' : ();
762 # Handy trick: $x || () inside a join means that if $x ends up as an
763 # empty string, it gets replaced with (), which makes join ignore it.
764 # (bad effect: this'll also happen to '0', this hopefully doesn't matter
767 join( ' AND ', map { "($_)" } @norm_parts ) || (),
773 my @phrased_queries = $self->_make_phrases(@query_parts);
775 This takes the supplied queries and forces them to be phrases by wrapping
776 quotes around them. It understands field prefixes, e.g. 'subject:' and puts
777 the quotes outside of them if they're there.
782 my ( $self, @parts ) = @_;
783 map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts;
786 =head2 _create_query_string
788 my @query_strings = $self->_create_query_string(@queries);
790 Given a list of hashrefs, it will turn them into a lucene-style query string.
791 The hash should contain field, type (both for the indexes), operator, and
796 sub _create_query_string {
797 my ( $self, @queries ) = @_;
800 my $otor = $_->{operator} ? $_->{operator} . ' ' : '';
801 my $field = $_->{field} ? $_->{field} . ':' : '';
803 my $oand = $self->_modify_string_by_type(%$_);
804 $oand = "($oand)" if $field && scalar(split(/\s+/, $oand)) > 1;
805 "$otor($field$oand)";
809 =head2 _clean_search_term
811 my $term = $self->_clean_search_term($term);
813 This cleans a search term by removing any funny characters that may upset
814 ES and give us an error. It also calls L<_convert_index_strings_freeform>
815 to ensure those parts are correct.
819 sub _clean_search_term {
820 my ( $self, $term ) = @_;
822 # Lookahead for checking if we are inside quotes
823 my $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)';
825 # Some hardcoded searches (like with authorities) produce things like
826 # 'an=123', when it ought to be 'an:123' for our purposes.
829 $term = $self->_convert_index_strings_freeform($term);
832 # Remove unbalanced quotes
833 my $unquoted = $term;
834 my $count = ($unquoted =~ tr/"/ /);
835 if ($count % 2 == 1) {
839 # Remove unquoted colons that have whitespace on either side of them
840 $term =~ s/(\:[:\s]+|[:\s]+:)$lookahead//g;
845 =head2 _fix_limit_special_cases
847 my $limits = $self->_fix_limit_special_cases($limits);
849 This converts any special cases that the limit specifications have into things
850 that are more readily processable by the rest of the code.
852 The argument should be an arrayref, and it'll return an arrayref.
856 sub _fix_limit_special_cases {
857 my ( $self, $limits ) = @_;
860 foreach my $l (@$limits) {
862 # This is set up by opac-search.pl
863 if ( $l =~ /^yr,st-numeric,ge=/ ) {
864 my ( $start, $end ) =
865 ( $l =~ /^yr,st-numeric,ge=(.*) and yr,st-numeric,le=(.*)$/ );
866 next unless defined($start) && defined($end);
867 push @new_lim, "copydate:[$start TO $end]";
869 elsif ( $l =~ /^yr,st-numeric=/ ) {
870 my ($date) = ( $l =~ /^yr,st-numeric=(.*)$/ );
871 next unless defined($date);
872 push @new_lim, "copydate:$date";
874 elsif ( $l =~ /^available$/ ) {
875 push @new_lim, 'onloan:0';
886 my $field = $self->_sort_field($field);
888 Given a field name, this works out what the actual name of the field to sort
889 on should be. A '__sort' suffix is added for fields with a sort version, and
890 for text fields either '.phrase' (for sortable versions) or '.raw' is appended
891 to avoid sorting on a tokenized value.
898 my $mappings = $self->get_elasticsearch_mappings();
899 my $textField = defined $mappings->{data}{properties}{$f}{type} && $mappings->{data}{properties}{$f}{type} eq 'text';
900 if (!defined $self->sort_fields()->{$f} || $self->sort_fields()->{$f}) {
902 # We need to add '.phrase' to text fields, otherwise it'll sort
903 # based on the tokenised form.
904 $f .= '.phrase' if $textField;
906 # We need to add '.raw' to text fields without a sort field,
907 # otherwise it'll sort based on the tokenised form.
908 $f .= '.raw' if $textField;
913 =head2 _truncate_terms
915 my $query = $self->_truncate_terms($query);
917 Given a string query this function appends '*' wildcard to all terms except
918 operands and double quoted strings.
922 sub _truncate_terms {
923 my ( $self, $query ) = @_;
925 my @tokens = $self->_split_query( $query );
927 # Filter out empty tokens
928 my @words = grep { $_ !~ /^\s*$/ } @tokens;
930 # Append '*' to words if needed, ie. if it ends in a word character and is not a keyword
933 (/\W$/ or grep {lc($w) eq $_} qw/and or not/) ? $_ : "$_*";
936 return join ' ', @terms;
941 my @token = $self->_split_query($query_str);
943 Given a string query this function splits it to tokens taking into account
944 any field prefixes and quoted strings.
948 my $tokenize_split_re = qr/((?:${field_name_pattern}${multi_field_pattern}:)?"[^"]+"|\s+)/;
951 my ( $self, $query ) = @_;
953 # '"donald duck" title:"the mouse" and peter" get split into
954 # ['', '"donald duck"', '', ' ', '', 'title:"the mouse"', '', ' ', 'and', ' ', 'pete']
955 my @tokens = split $tokenize_split_re, $query;
957 # Filter out empty values
958 @tokens = grep( /\S/, @tokens );