Browse Source

Bug 12478: allow more granular sorting configuration

This allows sorting to be configured within a field. For example, while
many values are included for search on author, sorting should only be
done on the main entry values. This permits that by have a sort value,
which can be true, false, or null. true and null are pretty much the
same, but false means that a field isn't available for sorting on. By
default (null), fields can be sorted on.

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
Signed-off-by: Jesse Weaver <jweaver@bywatersolutions.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>

Signed-off-by: Brendan Gallagher <brendan@bywatersolutions.com>
16.05.x
Robin Sheat 9 years ago
committed by Brendan Gallagher
parent
commit
2e79ea0f74
  1. 72
      Koha/ElasticSearch.pm
  2. 6
      Koha/Schema/Result/SearchField.pm
  3. 23
      Koha/Schema/Result/SearchMarcMap.pm
  4. 33
      Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
  5. 8
      Koha/SearchEngine/Elasticsearch/Search.pm
  6. 2
      Koha/SearchEngine/QueryBuilder.pm
  7. 2
      acqui/neworderbiblio.pl
  8. 6
      authorities/authorities-home.pl
  9. 6
      catalogue/search.pl
  10. 6
      cataloguing/addbooks.pl
  11. 22
      installer/data/mysql/elasticsearch_mapping.sql
  12. 6
      opac/opac-authorities-home.pl
  13. 2
      opac/opac-search.pl

72
Koha/ElasticSearch.pm

@ -20,14 +20,18 @@ package Koha::ElasticSearch;
use base qw(Class::Accessor);
use C4::Context;
use Carp;
use Koha::Database;
use Carp;
use JSON;
use Modern::Perl;
use Readonly;
use Data::Dumper; # TODO remove
__PACKAGE__->mk_ro_accessors(qw( index ));
__PACKAGE__->mk_accessors(qw( sort_fields ));
# Constants to refer to the standard index names
Readonly our $BIBLIOS_INDEX => 'biblios';
@ -158,12 +162,13 @@ created.
sub get_elasticsearch_mappings {
my ($self) = @_;
# TODO cache in the object?
my $mappings = {
data => {
properties => {
record => {
store => "yes",
include_in_all => "false",
include_in_all => JSON::false,
type => "string",
},
'_all.phrase' => {
@ -174,10 +179,11 @@ sub get_elasticsearch_mappings {
}
}
};
my %sort_fields;
my $marcflavour = lc C4::Context->preference('marcflavour');
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $marc_type ) = @_;
my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
return if $marc_type ne $marcflavour;
# TODO if this gets any sort of complexity to it, it should
# be broken out into its own function.
@ -220,11 +226,47 @@ sub get_elasticsearch_mappings {
search_analyzer => 'simple',
};
}
# Sort may be true, false, or undef. Here we care if it's
# anything other than undef.
if (defined $sort) {
$mappings->{data}{properties}{ $name . '__sort' } = {
search_analyzer => "analyser_phrase",
index_analyzer => "analyser_phrase",
type => "string",
include_in_all => JSON::false,
fields => {
phrase => {
search_analyzer => "analyser_phrase",
index_analyzer => "analyser_phrase",
type => "string",
},
},
};
$sort_fields{$name} = 1;
}
}
);
$self->sort_fields(\%sort_fields);
return $mappings;
}
# This overrides the accessor provided by Class::Accessor so that if
# sort_fields isn't set, then it'll generate it.
sub sort_fields {
my $self = shift;
if (@_) {
$self->_sort_fields_accessor(@_);
return;
}
my $val = $self->_sort_fields_accessor();
return $val if $val;
# This will populate the accessor as a side effect
$self->get_elasticsearch_mappings();
return $self->_sort_fields_accessor();
}
# Provides the rules for data conversion.
sub get_fixer_rules {
my ($self) = @_;
@ -233,7 +275,7 @@ sub get_fixer_rules {
my @rules;
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $marc_type, $marc_field ) = @_;
my ( $name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field ) = @_;
return if $marc_type ne $marcflavour;
my $options = '';
@ -260,6 +302,15 @@ sub get_fixer_rules {
if ($type eq 'sum' ) {
push @rules, "sum('$name')";
}
# Sort is a bit special as it can be true, false, undef. For
# fixer rules, we care about "true", or "undef" if there is
# special handling of this field from other one. "undef" means
# to do the default thing, which is make it sortable.
if ($self->sort_fields()->{$name}) {
if ($sort || !defined $sort) {
push @rules, "marc_map('$marc_field','${name}__sort', $options)";
}
}
}
);
return \@rules;
@ -269,7 +320,9 @@ sub get_fixer_rules {
$self->_foreach_mapping(
sub {
my ( $name, $type, $facet, $suggestible, $marc_type, $marc_field ) = @_;
my ( $name, $type, $facet, $suggestible, $sort, $marc_type,
$marc_field )
= @_;
return unless $marc_type eq 'marc21';
print "Data comes from: " . $marc_field . "\n";
}
@ -296,6 +349,13 @@ The type for this value, e.g. 'string'.
True if this value should be facetised. This only really makes sense if the
field is understood by the facet processing code anyway.
=item C<$sort>
True if this is a field that a) needs special sort handling, and b) if it
should be sorted on. False if a) but not b). Undef if not a). This allows,
for example, author to be sorted on but not everything marked with "author"
to be included in that sort.
=item C<$marc_type>
A string that indicates the MARC type that this mapping is for, e.g. 'marc21',
@ -325,12 +385,14 @@ sub _foreach_mapping {
my $facet = $row->facet;
my $suggestible = $row->suggestible;
my $search_field = $row->search_fields();
my $sort = $row->sort();
for my $sf ( $search_field->all ) {
$sub->(
$sf->name,
$sf->type,
$facet,
$suggestible,
$sort,
$marc_type,
$marc_field,
);

6
Koha/Schema/Result/SearchField.pm

@ -43,7 +43,7 @@ the name of the field as it will be stored in the search engine
extra: {list => ["string","date","number","boolean","sum"]}
is_nullable: 0
what type of data this holds, relevant when storing it
what type of data this holds, relevant when storing it in the search engine
=cut
@ -114,8 +114,8 @@ Composing rels: L</search_marc_to_fields> -> search_marc_map
__PACKAGE__->many_to_many("search_marc_maps", "search_marc_to_fields", "search_marc_map");
# Created by DBIx::Class::Schema::Loader v0.07042 @ 2015-06-10 14:32:07
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:1pj98qkKkP9g0hJYExud0A
# Created by DBIx::Class::Schema::Loader v0.07042 @ 2015-09-01 16:56:47
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:VN1BPJJTnr7p+I2bRZoBEA
# You can replace this text with custom code or comments, and it will be preserved on regeneration

23
Koha/Schema/Result/SearchMarcMap.pm

@ -61,6 +61,21 @@ the MARC specifier for this field
true if a facet field should be generated for this
=head2 suggestible
data_type: 'tinyint'
default_value: 0
is_nullable: 1
true if this field can be used to generate suggestions for browse
=head2 sort
data_type: 'tinyint'
is_nullable: 1
true/false creates special sort handling, null doesn't
=cut
__PACKAGE__->add_columns(
@ -82,6 +97,10 @@ __PACKAGE__->add_columns(
{ data_type => "varchar", is_nullable => 0, size => 255 },
"facet",
{ data_type => "tinyint", default_value => 0, is_nullable => 1 },
"suggestible",
{ data_type => "tinyint", default_value => 0, is_nullable => 1 },
"sort",
{ data_type => "tinyint", is_nullable => 1 },
);
=head1 PRIMARY KEY
@ -142,8 +161,8 @@ Composing rels: L</search_marc_to_fields> -> search_field
__PACKAGE__->many_to_many("search_fields", "search_marc_to_fields", "search_field");
# Created by DBIx::Class::Schema::Loader v0.07042 @ 2015-06-10 14:32:07
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:9Xz04ajKUJXxwJ5pdo+cUQ
# Created by DBIx::Class::Schema::Loader v0.07042 @ 2015-09-01 16:56:47
# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:Vo1uboO+iKCunqfpetswDg
# You can replace this text with custom code or comments, and it will be preserved on regeneration

33
Koha/SearchEngine/Elasticsearch/QueryBuilder.pm

@ -29,8 +29,8 @@ provides something that can be given to elasticsearch to get answers.
=head1 SYNOPSIS
use Koha::SearchEngine::Elasticsearch;
$builder = Koha::SearchEngine::Elasticsearch->new();
use Koha::SearchEngine::Elasticsearch::QueryBuilder;
$builder = Koha::SearchEngine::Elasticsearch->new({ index => $index });
my $simple_query = $builder->build_query("hello");
# This is currently undocumented because the original code is undocumented
my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators);
@ -39,7 +39,7 @@ provides something that can be given to elasticsearch to get answers.
=cut
use base qw(Class::Accessor);
use base qw(Koha::ElasticSearch);
use Carp;
use JSON;
use List::MoreUtils qw/ each_array /;
@ -102,6 +102,8 @@ sub build_query {
$d = 'asc' unless $d;
# TODO account for fields that don't have a 'phrase' type
$f = $self->_sort_field($f);
push @{ $res->{sort} }, { "$f.phrase" => { order => $d } };
}
}
@ -139,7 +141,7 @@ sub build_browse_query {
author => 1,
);
$field = 'title' if !exists $field_whitelist{$field};
my $sort = $self->_sort_field($field);
my $res = {
query => {
match_phrase_prefix => {
@ -150,7 +152,7 @@ sub build_browse_query {
}
}
},
sort => [ { "$field.phrase" => { order => "asc" } } ],
sort => [ { "$sort.phrase" => { order => "asc" } } ],
};
}
@ -303,7 +305,8 @@ sub build_authorities_query {
if ( exists $search->{sort} ) {
my %s;
foreach my $k ( keys %{ $search->{sort} } ) {
$s{"$k.phrase"} = $search->{sort}{$k};
my $f = $self->_sort_field($k);
$s{"$f.phrase"} = $search->{sort}{$k};
}
$search->{sort} = \%s;
}
@ -733,4 +736,22 @@ sub _fix_limit_special_cases {
return \@new_lim;
}
=head2 _sort_field
my $field = $self->_sort_field($field);
Given a field name, this works out what the actual name of the version to sort
on should be. Often it's the same, sometimes it involves sticking "__sort" on
the end. Maybe it'll be something else in the future, who knows?
=cut
sub _sort_field {
my ($self, $f) = @_;
if ($self->sort_fields()->{$f}) {
$f .= '__sort';
}
return $f;
}
1;

8
Koha/SearchEngine/Elasticsearch/Search.pm

@ -23,8 +23,10 @@ Koha::SearchEngine::ElasticSearch::Search - search functions for Elasticsearch
=head1 SYNOPSIS
my $searcher = Koha::SearchEngine::ElasticSearch::Search->new();
my $builder = Koha::SearchEngine::Elasticsearch::QueryBuilder->new();
my $searcher =
Koha::SearchEngine::ElasticSearch::Search->new( { index => $index } );
my $builder = Koha::SearchEngine::Elasticsearch::QueryBuilder->new(
{ index => $index } );
my $query = $builder->build_query('perl');
my $results = $searcher->search($query);
print "There were " . $results->total . " results.\n";
@ -321,7 +323,7 @@ sub simple_search_compat {
unless (ref $query) {
# We'll push it through the query builder to sanitise everything.
my $qb = Koha::SearchEngine::QueryBuilder->new();
my $qb = Koha::SearchEngine::QueryBuilder->new({index => $self->index});
(undef,$query) = $qb->build_query_compat(undef, [$query]);
}
my $results = $self->search($query, undef, $max_results, %options);

2
Koha/SearchEngine/QueryBuilder.pm

@ -33,7 +33,7 @@ and just get whatever querybuilder you need.
=head1 SYNOPSIS
use Koha::SearchEngine::QueryBuilder;
my $qb = Koha::SearchEngine::QueryBuilder->new();
my $qb = Koha::SearchEngine::QueryBuilder->new({index => $Koha::SearchEngine::BIBLIOS_INDEX});
=head1 METHODS

2
acqui/neworderbiblio.pl

@ -102,7 +102,7 @@ my @operands = $query;
my $QParser;
$QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
my $builtquery;
my $builder = Koha::SearchEngine::QueryBuilder->new();
my $builder = Koha::SearchEngine::QueryBuilder->new({index => $Koha::SearchEngine::BIBLIOS_INDEX});
my $searcher = Koha::SearchEngine::Search->new({index => $Koha::SearchEngine::BIBLIOS_INDEX});
if ($QParser) {
$builtquery = $query;

6
authorities/authorities-home.pl

@ -79,8 +79,10 @@ if ( $op eq "do_search" ) {
my $startfrom = $query->param('startfrom') || 1;
my $resultsperpage = $query->param('resultsperpage') || 20;
my $builder = Koha::SearchEngine::QueryBuilder->new();
my $searcher = Koha::SearchEngine::Search->new({index => 'authorities'});
my $builder = Koha::SearchEngine::QueryBuilder->new(
{ index => $Koha::SearchEngine::AUTHORITIES_INDEX } );
my $searcher = Koha::SearchEngine::Search->new(
{ index => $Koha::SearchEngine::AUTHORITIES_INDEX } );
my $search_query = $builder->build_authorities_query_compat(
[$marclist], [$and_or], [$excluding], [$operator],
[$value], $authtypecode, $orderby

6
catalogue/search.pl

@ -467,8 +467,10 @@ my $expanded_facet = $params->{'expand'};
# Define some global variables
my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type);
my $builder = Koha::SearchEngine::QueryBuilder->new();
my $searcher = Koha::SearchEngine::Search->new({index => 'biblios'});
my $builder = Koha::SearchEngine::QueryBuilder->new(
{ index => $Koha::SearchEngine::BIBLIOS_INDEX } );
my $searcher = Koha::SearchEngine::Search->new(
{ index => $Koha::SearchEngine::BIBLIOS_INDEX } );
my @results;

6
cataloguing/addbooks.pl

@ -77,8 +77,10 @@ if ($query) {
my $QParser;
$QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser'));
my $builtquery;
my $builder = Koha::SearchEngine::QueryBuilder->new();
my $searcher = Koha::SearchEngine::Search->new({index => 'biblios'});
my $builder = Koha::SearchEngine::QueryBuilder->new(
{ index => $Koha::SearchEngine::BIBLIOS_INDEX } );
my $searcher = Koha::SearchEngine::Search->new(
{ index => $Koha::SearchEngine::BIBLIOS_INDEX } );
if ($QParser) {
$builtquery = $query;
} else {

22
installer/data/mysql/elasticsearch_mapping.sql

@ -11,6 +11,7 @@ CREATE TABLE `elasticsearch_mapping` (
`type` varchar(255) NOT NULL,
`facet` boolean DEFAULT FALSE,
`suggestible` boolean DEFAULT FALSE,
`sort` boolean DEFAULT NULL,
`marc21` varchar(255) DEFAULT NULL,
`unimarc` varchar(255) DEFAULT NULL,
`normarc` varchar(255) DEFAULT NULL,
@ -28,13 +29,22 @@ CREATE TABLE `search_field` (
-- This contains a MARC field specifier for a given index, marc type, and marc
-- field.
--
-- a note about the sort field:
-- * if all the entries for a mapping are 'null', nothing special is done with that mapping.
-- * if any of the entries are not null, then a __sort field is created in ES for this mapping. In this case:
-- * any mapping with sort == false WILL NOT get copied into a __sort field
-- * any mapping with sort == true or is null WILL get copied into a __sort field
-- * any sorts on the field name will be applied to $fieldname.'__sort' instead.
-- this means that we can have search for author that includes 1xx, 245$c, and 7xx, but the sort only applies to 1xx.
CREATE TABLE `search_marc_map` (
id int(11) NOT NULL AUTO_INCREMENT,
index_name ENUM('biblios','authorities') NOT NULL COMMENT 'what storage index this map is for',
marc_type ENUM('marc21', 'unimarc', 'normarc') NOT NULL COMMENT 'what MARC type this map is for',
marc_field VARCHAR(255) NOT NULL COMMENT 'the MARC specifier for this field',
`facet` boolean DEFAULT FALSE COMMENT 'true if a facet field should be generated for this',
`suggestible` boolean DEFAULT FALSE COMMENT 'true if this field can be used to generate suggestions',
`suggestible` boolean DEFAULT FALSE COMMENT 'true if this field can be used to generate suggestions for browse',
`sort` boolean DEFAULT NULL COMMENT 'true/false creates special sort handling, null doesn''t',
PRIMARY KEY(`id`),
INDEX (`index_name`),
UNIQUE KEY (index_name, marc_type, marc_field)
@ -95,8 +105,8 @@ INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestib
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',TRUE,TRUE,'string','100a','200f','100a');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',TRUE,TRUE,'string','110a','200g','110a');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',TRUE,TRUE,'string','111a',NULL,'111a');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',TRUE,TRUE,'string','700a','700a','700a');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',FALSE,FALSE,'string','245c','701','245c');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `sort`,`type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',TRUE,TRUE,FALSE,'string','700a','700a','700a'); -- no sorting on the
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `sort`,`type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','author',FALSE,FALSE,FALSE,'string','245c','701','245c'); -- extra author fields
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','title',FALSE,TRUE,'string','245a','200a','245a');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','title',FALSE,TRUE,'string','246','200c','246');
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','title',FALSE,TRUE,'string','247','200d','247');
@ -280,9 +290,9 @@ INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestib
INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `suggestible`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('authorities','Match',FALSE,FALSE,'','511acdefghjklnpqstvxyz',NULL,'511acdefghjklnpqstvxyz');
-- temporary to convert into new table form
insert into search_marc_map(index_name, marc_type, marc_field, facet, suggestible) select distinct indexname, 'marc21', marc21, facet, suggestible from elasticsearch_mapping where marc21 is not null;
insert into search_marc_map(index_name, marc_type, marc_field, facet, suggestible) select distinct indexname, 'unimarc', unimarc, facet, suggestible from elasticsearch_mapping where unimarc is not null;
insert into search_marc_map(index_name, marc_type, marc_field, facet, suggestible) select distinct indexname, 'normarc', normarc, facet, suggestible from elasticsearch_mapping where normarc is not null;
insert into search_marc_map(index_name, marc_type, marc_field, facet, suggestible, sort) select distinct indexname, 'marc21', marc21, facet, suggestible, sort from elasticsearch_mapping where marc21 is not null;
insert into search_marc_map(index_name, marc_type, marc_field, facet, suggestible, sort) select distinct indexname, 'unimarc', unimarc, facet, suggestible, sort from elasticsearch_mapping where unimarc is not null;
insert into search_marc_map(index_name, marc_type, marc_field, facet, suggestible, sort) select distinct indexname, 'normarc', normarc, facet, suggestible, sort from elasticsearch_mapping where normarc is not null;
insert into search_field (name, type) select distinct mapping, type from elasticsearch_mapping;
insert into search_marc_to_field(search_marc_map_id, search_field_id) select search_marc_map.id,search_field.id from search_field, search_marc_map, elasticsearch_mapping where elasticsearch_mapping.mapping=search_field.name AND elasticsearch_mapping.marc21=search_marc_map.marc_field AND search_marc_map.marc_type='marc21' AND indexname='biblios' AND index_name='biblios';

6
opac/opac-authorities-home.pl

@ -60,8 +60,10 @@ if ( $op eq "do_search" ) {
$resultsperpage = $query->param('resultsperpage');
$resultsperpage = 20 if ( !defined $resultsperpage );
my @tags;
my $builder = Koha::SearchEngine::QueryBuilder->new();
my $searcher = Koha::SearchEngine::Search->new({index => 'authorities'});
my $builder = Koha::SearchEngine::QueryBuilder->new(
{ index => $Koha::SearchEngine::AUTHORITIES_INDEX } );
my $searcher = Koha::SearchEngine::Search->new(
{ index => $Koha::SearchEngine::AUTHORITIES_INDEX } );
my $search_query = $builder->build_authorities_query_compat( \@marclist, \@and_or,
\@excluding, \@operator, \@value, $authtypecode, $orderby );
# use Data::Dumper;

2
opac/opac-search.pl

@ -38,7 +38,7 @@ use Koha::SearchEngine::QueryBuilder;
my $searchengine = C4::Context->preference("SearchEngine");
my ($builder, $searcher);
#$searchengine = 'Zebra'; # XXX
$builder = Koha::SearchEngine::QueryBuilder->new();
$builder = Koha::SearchEngine::QueryBuilder->new({index => 'biblios'});
$searcher = Koha::SearchEngine::Search->new({index => 'biblios'});
use C4::Output;

Loading…
Cancel
Save