From 7dbd13e66f0ada4f0a4fa116f08def230a7858bf Mon Sep 17 00:00:00 2001 From: Robin Sheat Date: Tue, 16 Sep 2014 17:38:43 +1200 Subject: [PATCH] Bug 12478 - pile of elasticsearch code Signed-off-by: Nick Clemens Signed-off-by: Jesse Weaver Signed-off-by: Tomas Cohen Arazi Signed-off-by: Kyle M Hall Signed-off-by: Brendan Gallagher --- C4/Biblio.pm | 8 + C4/Search.pm | 10 +- Koha/ElasticSearch.pm | 323 ++++++++++++ Koha/ElasticSearch/Indexer.pm | 155 ++++++ Koha/ElasticSearch/Search.pm | 230 ++++++++ Koha/Schema/Result/ElasticsearchMapping.pm | 105 ++++ .../Elasticsearch/QueryBuilder.pm | 498 ++++++++++++++++++ Koha/SearchEngine/Zebra/Search.pm | 76 +++ Koha/Template/Plugin/Price.pm | 7 + .../data/mysql/elasticsearch_mapping.sql | 148 ++++++ installer/data/mysql/kohastructure.sql | 15 + .../en/modules/admin/preferences/admin.pref | 9 + misc/search_tools/rebuild_elastic_search.pl | 148 ++++++ opac/elasticsearch.pl | 102 ++++ opac/opac-search.pl | 38 +- t/Koha_ElasticSearch.t | 23 + t/Koha_ElasticSearch_Indexer.t | 51 ++ t/Koha_ElasticSearch_Search.t | 38 ++ 18 files changed, 1978 insertions(+), 6 deletions(-) create mode 100644 Koha/ElasticSearch.pm create mode 100644 Koha/ElasticSearch/Indexer.pm create mode 100644 Koha/ElasticSearch/Search.pm create mode 100644 Koha/Schema/Result/ElasticsearchMapping.pm create mode 100644 Koha/SearchEngine/Elasticsearch/QueryBuilder.pm create mode 100644 Koha/SearchEngine/Zebra/Search.pm create mode 100644 installer/data/mysql/elasticsearch_mapping.sql create mode 100755 misc/search_tools/rebuild_elastic_search.pl create mode 100755 opac/elasticsearch.pl create mode 100644 t/Koha_ElasticSearch.t create mode 100644 t/Koha_ElasticSearch_Indexer.t create mode 100644 t/Koha_ElasticSearch_Search.t diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 9a7c86db72..06d0ac1d48 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -3468,6 +3468,14 @@ sub ModBiblioMarc { $sth = $dbh->prepare("UPDATE biblioitems SET marc=?,marcxml=? WHERE biblionumber=?"); $sth->execute( $record->as_usmarc(), $record->as_xml_record($encoding), $biblionumber ); $sth->finish; + if ( C4::Context->preference('SearchEngine') eq 'ElasticSearch' ) { +# shift to its on sub, so it can do it realtime or queue + can_load( modules => { 'Koha::ElasticSearch::Indexer' => undef } ); + # need to get this from syspref probably biblio/authority for index + my $indexer = Koha::ElasticSearch::Indexer->new(); + my $records = [$record]; + $indexer->update_index([$biblionumber], $records); + } ModZebra( $biblionumber, "specialUpdate", "biblioserver" ); return $biblionumber; } diff --git a/C4/Search.pm b/C4/Search.pm index 9f8f219254..d18b121811 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -2536,9 +2536,9 @@ sub _ZOOM_event_loop { } } -=head2 new_record_from_zebra +=head2 new_record_from_searchengine -Given raw data from a Zebra result set, return a MARC::Record object +Given raw data from a searchengine result set, return a MARC::Record object This helper function is needed to take into account all the involved system preferences and configuration variables to properly create the @@ -2547,6 +2547,8 @@ MARC::Record object. If we are using GRS-1, then the raw data we get from Zebra should be USMARC data. If we are using DOM, then it has to be MARCXML. +If we are using elasticsearch, it'll already be a MARC::Record. + =cut sub new_record_from_zebra { @@ -2557,6 +2559,10 @@ sub new_record_from_zebra { my $index_mode = ( $server eq 'biblioserver' ) ? C4::Context->config('zebra_bib_index_mode') // 'dom' : C4::Context->config('zebra_auth_index_mode') // 'dom'; + my $search_engine = C4::Context->preference("SearchEngine"); + if ($search_engine eq 'Elasticsearch') { + return $raw_data; + } my $marc_record = eval { if ( $index_mode eq 'dom' ) { diff --git a/Koha/ElasticSearch.pm b/Koha/ElasticSearch.pm new file mode 100644 index 0000000000..f2f287f899 --- /dev/null +++ b/Koha/ElasticSearch.pm @@ -0,0 +1,323 @@ +package Koha::ElasticSearch; + +# Copyright 2013 Catalyst IT +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +use base qw(Class::Accessor); + +use C4::Context; +use Carp; +use Elasticsearch; +use Koha::Database; +use Modern::Perl; + +use Data::Dumper; # TODO remove + +__PACKAGE__->mk_ro_accessors(qw( index )); + +=head1 NAME + +Koha::ElasticSearch - Base module for things using elasticsearch + +=head1 ACCESSORS + +=over 4 + +=item index + +The name of the index to use, generally 'biblios' or 'authorities'. + +=back + +=head1 FUNCTIONS + +=cut + +sub new { + my $class = shift @_; + my $self = $class->SUPER::new(@_); + # Check for a valid index + croak('No index name provided') unless $self->index; + return $self; +} + +=head2 get_elasticsearch_params + + my $params = $self->get_elasticsearch_params(); + +This provides a hashref that contains the parameters for connecting to the +ElasicSearch servers, in the form: + + { + 'servers' => ['127.0.0.1:9200', 'anotherserver:9200'], + 'index_name' => 'koha_instance', + } + +This is configured by the following in the C block in koha-conf.xml: + + + 127.0.0.1:9200 + anotherserver:9200 + koha_instance + + +=cut + +sub get_elasticsearch_params { + my ($self) = @_; + + # Copy the hash so that we're not modifying the original + my $es = { %{ C4::Context->config('elasticsearch') } }; + die "No 'elasticsearch' block is defined in koha-conf.xml.\n" if ( !$es ); + + # Helpfully, the multiple server lines end up in an array for us anyway + # if there are multiple ones, but not if there's only one. + my $server = $es->{server}; + delete $es->{server}; + if ( ref($server) eq 'ARRAY' ) { + + # store it called 'servers' + $es->{servers} = $server; + } + elsif ($server) { + $es->{servers} = [$server]; + } + else { + die "No elasticsearch servers were specified in koha-conf.xml.\n"; + } + die "No elasticserver index_name was specified in koha-conf.xml.\n" + if ( !$es->{index_name} ); + # Append the name of this particular index to our namespace + $es->{index_name} .= '_' . $self->index; + return $es; +} + +=head2 get_elasticsearch_settings + + my $settings = $self->get_elasticsearch_settings(); + +This provides the settings provided to elasticsearch when an index is created. +These can do things like define tokenisation methods. + +A hashref containing the settings is returned. + +=cut + +sub get_elasticsearch_settings { + my ($self) = @_; + + # Ultimately this should come from a file or something, and not be + # hardcoded. + my $settings = { + index => { + analysis => { + analyzer => { + analyser_phrase => { + tokenizer => 'keyword', + filter => 'lowercase', + }, + analyser_standard => { + tokenizer => 'standard', + filter => 'lowercase', + } + } + } + } + }; + return $settings; +} + +=head2 get_elasticsearch_mappings + + my $mappings = $self->get_elasticsearch_mappings(); + +This provides the mappings that get passed to elasticsearch when an index is +created. + +=cut + +sub get_elasticsearch_mappings { + my ($self) = @_; + + my $mappings = { + data => { + properties => { + record => { + store => "yes", + include_in_all => "false", + type => "string", + }, + } + } + }; + $self->_foreach_mapping( + sub { + my ( undef, $name, $type, $facet ) = @_; + + # TODO if this gets any sort of complexity to it, it should + # be broken out into its own function. + + # TODO be aware of date formats, but this requires pre-parsing + # as ES will simply reject anything with an invalid date. + my $es_type = + $type eq 'boolean' + ? 'boolean' + : 'string'; + $mappings->{data}{properties}{$name} = { + search_analyzer => "analyser_standard", + index_analyzer => "analyser_standard", + type => $es_type, + fields => { + phrase => { + search_analyzer => "analyser_phrase", + index_analyzer => "analyser_phrase", + type => "string" + }, + }, + }; + $mappings->{data}{properties}{$name}{null_value} = 0 + if $type eq 'boolean'; + if ($facet) { + $mappings->{data}{properties}{ $name . '__facet' } = { + type => "string", + index => "not_analyzed", + }; + } + } + ); + return $mappings; +} + +# Provides the rules for data conversion. +sub get_fixer_rules { + my ($self) = @_; + + my $marcflavour = lc C4::Context->preference('marcflavour'); + my @rules; + $self->_foreach_mapping( + sub { + my ( undef, $name, $type, $facet, $marcs ) = @_; + my $field = $marcs->{$marcflavour}; + return unless defined $marcs->{$marcflavour}; + my $options = ''; + + # There's a bug when using 'split' with something that + # selects a range + # The split makes everything into nested arrays, but that's not + # really a big deal, ES doesn't mind. + $options = '-split => 1' unless $field =~ m|_/| || $type eq 'sum'; + push @rules, "marc_map('$field','${name}', $options)"; + if ($facet) { + push @rules, "marc_map('$field','${name}__facet', $options)"; + } + if ( $type eq 'boolean' ) { + + # boolean gets special handling, basically if it doesn't exist, + # it's added and set to false. Otherwise we can't query it. + push @rules, + "unless exists('$name') add_field('$name', 0) end"; + } + if ($type eq 'sum' ) { + push @rules, "sum('$name')"; + } + } + ); + + return \@rules; +} + +=head2 _foreach_mapping + + $self->_foreach_mapping( + sub { + my ( $id, $name, $type, $facet, $marcs ) = @_; + my $marc = $marcs->{marc21}; + } + ); + +This allows you to apply a function to each entry in the elasticsearch mappings +table, in order to build the mappings for whatever is needed. + +In the provided function, the files are: + +=over 4 + +=item C<$id> + +An ID number, corresponding to the entry in the database. + +=item C<$name> + +The field name for elasticsearch (corresponds to the 'mapping' column in the +database. + +=item C<$type> + +The type for this value, e.g. 'string'. + +=item C<$facet> + +True if this value should be facetised. This only really makes sense if the +field is understood by the facet processing code anyway. + +=item C<$marc> + +A hashref containing the MARC field specifiers for each MARC type. It's quite +possible for this to be undefined if there is otherwise an entry in a +different MARC form. + +=back + +=cut + +sub _foreach_mapping { + my ( $self, $sub ) = @_; + + # TODO use a caching framework here + my $database = Koha::Database->new(); + my $schema = $database->schema(); + my $rs = $schema->resultset('ElasticsearchMapping')->search(); + for my $row ( $rs->all ) { + $sub->( + $row->id, + $row->mapping, + $row->type, + $row->facet, + { + marc21 => $row->marc21, + unimarc => $row->unimarc, + normarc => $row->normarc + } + ); + } +} + +1; + +__END__ + +=head1 AUTHOR + +=over 4 + +=item Chris Cormack C<< >> + +=item Robin Sheat C<< >> + +=back + +=cut diff --git a/Koha/ElasticSearch/Indexer.pm b/Koha/ElasticSearch/Indexer.pm new file mode 100644 index 0000000000..b7d609706a --- /dev/null +++ b/Koha/ElasticSearch/Indexer.pm @@ -0,0 +1,155 @@ +package Koha::ElasticSearch::Indexer; + +# Copyright 2013 Catalyst IT +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +use Carp; +use Modern::Perl; +use base qw(Koha::ElasticSearch); +use Data::Dumper; + +# For now just marc, but we can do anything here really +use Catmandu::Importer::MARC; +use Catmandu::Store::ElasticSearch; + +Koha::ElasticSearch::Indexer->mk_accessors(qw( store )); + +=head1 NAME + +Koha::ElasticSearch::Indexer - handles adding new records to the index + +=head1 SYNOPSIS + + my $indexer = Koha::ElasticSearch::Indexer->new({ index => 'biblios' }); + $indexer->delete_index(); + $indexer->update_index(\@biblionumbers, \@records); + +=head1 FUNCTIONS + +=cut + +=head2 $indexer->update_index($biblionums, $records); + +C<$biblionums> is an arrayref containing the biblionumbers for the records. + +C<$records> is an arrayref containing the Ls themselves. + +The values in the arrays must match up, and the 999$c value in the MARC record +will be rewritten using the values in C<$biblionums> to ensure they are correct. +If C<$biblionums> is C, this won't happen, but you should be sure that +999$c is correct on your own then. + +Note that this will modify the original record if C<$biblionums> is supplied. +If that's a problem, clone them first. + +=cut + +sub update_index { + my ($self, $biblionums, $records) = @_; + + if ($biblionums) { + $self->_sanitise_records($biblionums, $records); + } + + my $from = $self->_convert_marc_to_json($records); + if ( !$self->store ) { + my $params = $self->get_elasticsearch_params(); + $self->store( + Catmandu::Store::ElasticSearch->new( + %$params, + index_settings => $self->get_elasticsearch_settings(), + index_mappings => $self->get_elasticsearch_mappings(), + #trace_calls => 1, + ) + ); + } + $self->store->bag->add_many($from); + $self->store->bag->commit; + return 1; +} + +=head2 $indexer->delete_index(); + +Deletes the index from the elasticsearch server. Calling C +after this will recreate it again. + +=cut + +sub delete_index { + my ($self) = @_; + + if (!$self->store) { + # If this index doesn't exist, this will create it. Then it'll be + # deleted. That's not the end of the world however. + my $params = $self->get_elasticsearch_params(); + $self->store( + Catmandu::Store::ElasticSearch->new( + %$params, + index_settings => $self->get_elasticsearch_settings(), + index_mappings => $self->get_elasticsearch_mappings(), + #trace_calls => 1, + ) + ); + } + $self->store->drop(); + $self->store(undef); +} + +sub _sanitise_records { + my ($self, $biblionums, $records) = @_; + + confess "Unequal number of values in \$biblionums and \$records." if (@$biblionums != @$records); + + my $c = @$biblionums; + for (my $i=0; $i<$c; $i++) { + my $bibnum = $biblionums->[$i]; + my $rec = $records->[$i]; + # I've seen things you people wouldn't believe. Attack ships on fire + # off the shoulder of Orion. I watched C-beams glitter in the dark near + # the Tannhauser gate. MARC records where 999$c doesn't match the + # biblionumber column. All those moments will be lost in time... like + # tears in rain... + $rec->delete_fields($rec->field('999')); + $rec->append_fields(MARC::Field->new('999','','','c' => $bibnum, 'd' => $bibnum)); + } +} + +sub _convert_marc_to_json { + my $self = shift; + my $records = shift; + my $importer = + Catmandu::Importer::MARC->new( records => $records, id => '999c' ); + my $fixer = Catmandu::Fix->new( fixes => $self->get_fixer_rules() ); + $importer = $fixer->fix($importer); + return $importer; +} + +1; + +__END__ + +=head1 AUTHOR + +=over 4 + +=item Chris Cormack C<< >> + +=item Robin Sheat C<< >> + +=back + +=cut diff --git a/Koha/ElasticSearch/Search.pm b/Koha/ElasticSearch/Search.pm new file mode 100644 index 0000000000..3b1ed7e592 --- /dev/null +++ b/Koha/ElasticSearch/Search.pm @@ -0,0 +1,230 @@ +package Koha::ElasticSearch::Search; + +# Copyright 2014 Catalyst IT +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +=head1 NAME + +Koha::ElasticSearch::Search - search functions for Elasticsearch + +=head1 SYNOPSIS + + my $searcher = Koha::ElasticSearch::Search->new(); + my $builder = Koha::SearchEngine::Elasticsearch::QueryBuilder->new(); + my $query = $builder->build_query('perl'); + my $results = $searcher->search($query); + print "There were " . $results->total . " results.\n"; + $results->each(sub { + push @hits, @_[0]; + }); + +=head1 METHODS + +=cut + +use base qw(Koha::ElasticSearch); +use Koha::ItemTypes; + +use Catmandu::Store::ElasticSearch; + +use Data::Dumper; #TODO remove +use Carp qw(cluck); + +Koha::ElasticSearch::Search->mk_accessors(qw( store )); + +=head2 search + + my $results = $searcher->search($query, $page, $count); + +Run a search using the query. It'll return C<$count> results, starting at page +C<$page> (C<$page> counts from 1, anything less that, or C becomes 1.) + +C<%options> is a hash containing extra options: + +=over 4 + +=item offset + +If provided, this overrides the C<$page> value, and specifies the record as +an offset (i.e. the number of the record to start with), rather than a page. + +=back + +=cut + +sub search { + my ($self, $query, $page, $count, %options) = @_; + + my $params = $self->get_elasticsearch_params(); + my %paging; + $paging{limit} = $count || 20; + # ES doesn't want pages, it wants a record to start from. + if (exists $options{offset}) { + $paging{start} = $options{offset}; + } else { + $page = (!defined($page) || ($page <= 0)) ? 1 : $page - 1; + $paging{start} = $page * $paging{limit}; + } + $self->store( + Catmandu::Store::ElasticSearch->new( + %$params, + trace_calls => 0, + ) + ); + my $results = $self->store->bag->search( %$query, %paging ); + return $results; +} + +=head2 search_compat + + my ( $error, $results, $facets ) = $search->search_compat( + $query, $simple_query, \@sort_by, \@servers, + $results_per_page, $offset, $expanded_facet, $branches, + $query_type, $scan + ) + +A search interface somewhat compatible with LgetRecords>. Anything +that is returned in the query created by build_query_compat will probably +get ignored here. + +=cut + +sub search_compat { + my ( + $self, $query, $simple_query, $sort_by, + $servers, $results_per_page, $offset, $expanded_facet, + $branches, $query_type, $scan + ) = @_; + + my %options; + $options{offset} = $offset; + my $results = $self->search($query, undef, $results_per_page, %options); + + # Convert each result into a MARC::Record + my (@records, $index); + $index = $offset; # opac-search expects results to be put in the + # right place in the array, according to $offset + $results->each(sub { + # The results come in an array for some reason + my $marc_json = @_[0]->{record}; + my $marc = $self->json2marc($marc_json); + $records[$index++] = $marc; + }); + # consumers of this expect a name-spaced result, we provide the default + # configuration. + my %result; + $result{biblioserver}{hits} = $results->total; + $result{biblioserver}{RECORDS} = \@records; + return (undef, \%result, $self->_convert_facets($results->{facets})); +} + +=head2 json2marc + + my $marc = $self->json2marc($marc_json); + +Converts the form of marc (based on its JSON, but as a Perl structure) that +Catmandu stores into a MARC::Record object. + +=cut + +sub json2marc { + my ( $self, $marcjson ) = @_; + + my $marc = MARC::Record->new(); + $marc->encoding('UTF-8'); + + # fields are like: + # [ '245', '1', '2', 'a' => 'Title', 'b' => 'Subtitle' ] + # conveniently, this is the form that MARC::Field->new() likes + foreach $field (@$marcjson) { + next if @$field < 5; # Shouldn't be possible, but... + if ( $field->[0] eq 'LDR' ) { + $marc->leader( $field->[4] ); + } + else { + my $marc_field = MARC::Field->new(@$field); + $marc->append_fields($marc_field); + } + } + return $marc; +} + +=head2 _convert_facets + + my $koha_facets = _convert_facets($es_facets); + +Converts elasticsearch facets types to the form that Koha expects. +It expects the ES facet name to match the Koha type, for example C, +C, C, etc. + +=cut + +sub _convert_facets { + my ( $self, $es ) = @_; + + return undef if !$es; + + # These should correspond to the ES field names, as opposed to the CCL + # things that zebra uses. + my %type_to_label = ( + author => 'Authors', + location => 'Location', + itype => 'ItemTypes', + se => 'Series', + subject => 'Topics', + 'su-geo' => 'Places', + ); + + # We also have some special cases, e.g. itypes that need to show the + # value rather than the code. + my $itypes = Koha::ItemTypes->new(); + my %special = ( itype => sub { $itypes->get_description_for_code(@_) }, ); + my @res; + while ( ( $type, $data ) = each %$es ) { + next if !exists( $type_to_label{$type} ); + my $facet = { + type_id => $type . '_id', + expand => $type, + expandable => 1, # TODO figure how that's supposed to work + "type_label_$type_to_label{$type}" => 1, + type_link_value => $type, + }; + foreach my $term ( @{ $data->{terms} } ) { + my $t = $term->{term}; + my $c = $term->{count}; + if ( exists( $special{$type} ) ) { + $label = $special{$type}->($t); + } + else { + $label = $t; + } + push @{ $facet->{facets} }, { + facet_count => $c, + facet_link_value => $t, + facet_title_value => $t . " ($c)", + facet_label_value => $label, # TODO either truncate this, + # or make the template do it like it should anyway + type_link_value => $type, + }; + } + push @res, $facet if exists $facet->{facets}; + } + return \@res; +} + + +1; diff --git a/Koha/Schema/Result/ElasticsearchMapping.pm b/Koha/Schema/Result/ElasticsearchMapping.pm new file mode 100644 index 0000000000..f37009a0d0 --- /dev/null +++ b/Koha/Schema/Result/ElasticsearchMapping.pm @@ -0,0 +1,105 @@ +use utf8; +package Koha::Schema::Result::ElasticsearchMapping; + +# Created by DBIx::Class::Schema::Loader +# DO NOT MODIFY THE FIRST PART OF THIS FILE + +=head1 NAME + +Koha::Schema::Result::ElasticsearchMapping + +=cut + +use strict; +use warnings; + +use base 'DBIx::Class::Core'; + +=head1 TABLE: C + +=cut + +__PACKAGE__->table("elasticsearch_mapping"); + +=head1 ACCESSORS + +=head2 id + + data_type: 'integer' + is_auto_increment: 1 + is_nullable: 0 + +=head2 mapping + + data_type: 'varchar' + is_nullable: 1 + size: 255 + +=head2 type + + data_type: 'varchar' + is_nullable: 1 + size: 255 + +=head2 facet + + data_type: 'tinyint' + default_value: 0 + is_nullable: 1 + +=head2 marc21 + + data_type: 'varchar' + is_nullable: 1 + size: 255 + +=head2 unimarc + + data_type: 'varchar' + is_nullable: 1 + size: 255 + +=head2 normarc + + data_type: 'varchar' + is_nullable: 1 + size: 255 + +=cut + +__PACKAGE__->add_columns( + "id", + { data_type => "integer", is_auto_increment => 1, is_nullable => 0 }, + "mapping", + { data_type => "varchar", is_nullable => 1, size => 255 }, + "type", + { data_type => "varchar", is_nullable => 1, size => 255 }, + "facet", + { data_type => "tinyint", default_value => 0, is_nullable => 1 }, + "marc21", + { data_type => "varchar", is_nullable => 1, size => 255 }, + "unimarc", + { data_type => "varchar", is_nullable => 1, size => 255 }, + "normarc", + { data_type => "varchar", is_nullable => 1, size => 255 }, +); + +=head1 PRIMARY KEY + +=over 4 + +=item * L + +=back + +=cut + +__PACKAGE__->set_primary_key("id"); + + +# Created by DBIx::Class::Schema::Loader v0.07040 @ 2014-06-06 16:20:16 +# DO NOT MODIFY THIS OR ANYTHING ABOVE! md5sum:uGRmWU0rshP6awyLMQYJeQ + + +# You can replace this text with custom code or comments, and it will be preserved on regeneration +1; diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm new file mode 100644 index 0000000000..184ecefcf6 --- /dev/null +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -0,0 +1,498 @@ +package Koha::SearchEngine::Elasticsearch::QueryBuilder; + +# This file is part of Koha. +# +# Copyright 2014 Catalyst IT Ltd. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +=head1 NAME + +Koha::SearchEngine::Elasticsearch::QueryBuilder - constructs elasticsearch +query objects from user-supplied queries + +=head1 DESCRIPTION + +This provides the functions that take a user-supplied search query, and +provides something that can be given to elasticsearch to get answers. + +=head1 SYNOPSIS + + use Koha::SearchEngine::Elasticsearch; + $builder = Koha::SearchEngine::Elasticsearch->new(); + my $simple_query = $builder->build_query("hello"); + # This is currently undocumented because the original code is undocumented + my $adv_query = $builder->build_advanced_query($indexes, $operands, $operators); + +=head1 METHODS + +=cut + +use base qw(Class::Accessor); +use List::MoreUtils qw/ each_array /; +use Modern::Perl; +use URI::Escape; + +use Data::Dumper; # TODO remove + +=head2 build_query + + my $simple_query = $builder->build_query("hello", %options) + +This will build a query that can be issued to elasticsearch from the provided +string input. This expects a lucene style search form (see +L +for details.) + +It'll make an attempt to respect the various query options. + +Additional options can be provided with the C<%options> hash. + +=over 4 + +=item sort + +This should be an arrayref of hashrefs, each containing a C and an +C (optional, defaults to C.) The results will be sorted +according to these values. Valid values for C are 'asc' and 'desc'. + +=back + +=cut + +sub build_query { + my ( $self, $query, %options ) = @_; + + my $stemming = C4::Context->preference("QueryStemming") || 0; + my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0; + my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; + my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; + + $query = '*' unless defined $query; + + my $res; + $res->{query} = { + query_string => { + query => $query, + fuzziness => $fuzzy_enabled ? 'auto' : '0', + default_operator => "AND", + default_field => "_all", + } + }; + + if ( $options{sort} ) { + foreach my $sort ( @{ $options{sort} } ) { + my ( $f, $d ) = @$sort{qw/ field direction /}; + die "Invalid sort direction, $d" + if $d && ( $d ne 'asc' && $d ne 'desc' ); + $d = 'asc' unless $d; + + # TODO account for fields that don't have a 'phrase' type + push @{ $res->{sort} }, { "$f.phrase" => { order => $d } }; + } + } + + # See _convert_facets in Search.pm for how these get turned into + # things that Koha can use. + $res->{facets} = { + author => { terms => { field => "author__facet" } }, + subject => { terms => { field => "subject__facet" } }, + itype => { terms => { field => "itype__facet" } }, + }; + return $res; +} + +=head2 build_browse_query + + my $browse_query = $builder->build_browse_query($field, $query); + +This performs a "starts with" style query on a particular field. The field +to be searched must have been indexed with an appropriate mapping as a +"phrase" subfield. + +=cut + +sub build_browse_query { + my ( $self, $field, $query ) = @_; + + my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; + + return { query => '*' } if !defined $query; + + # TODO this should come from Koha::Elasticsearch + my %field_whitelist = ( + title => 1, + author => 1, + ); + $field = 'title' if !exists $field_whitelist{$field}; + + my $res = { + query => { + match_phrase_prefix => { + "$field.phrase" => { + query => $query, + operator => 'or', + fuzziness => $fuzzy_enabled ? 'auto' : '0', + } + } + }, + sort => [ { "$field.phrase" => { order => "asc" } } ], + }; +} + +=head2 build_query_compat + + my ( + $error, $query, $simple_query, $query_cgi, + $query_desc, $limit, $limit_cgi, $limit_desc, + $stopwords_removed, $query_type + ) + = $builder->build_query_compat( \@operators, \@operands, \@indexes, + \@limits, \@sort_by, $scan, $lang ); + +This handles a search using the same api as L does. + +A very simple query will go in with C<$operands> set to ['query'], and +C<$sort_by> set to ['pubdate_dsc']. This simple case will return with +C<$query> set to something that can perform the search, C<$simple_query> +set to just the search term, C<$query_cgi> set to something that can +reproduce this search, and C<$query_desc> set to something else. + +=cut + +sub build_query_compat { + my ( $self, $operators, $operands, $indexes, $orig_limits, $sort_by, $scan, + $lang ) + = @_; + +#die Dumper ( $self, $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang ); + my @sort_params = $self->_convert_sort_fields(@$sort_by); + my @index_params = $self->_convert_index_fields(@$indexes); + my $limits = $self->_fix_limit_special_cases($orig_limits); + + # Merge the indexes in with the search terms and the operands so that + # each search thing is a handy unit. + unshift @$operators, undef; # The first one can't have an op + my @search_params; + my $ea = each_array( @$operands, @$operators, @index_params ); + while ( my ( $oand, $otor, $index ) = $ea->() ) { + next if ( !defined($oand) || $oand eq '' ); + push @search_params, { + operand => $self->_clean_search_term($oand), # the search terms + operator => defined($otor) ? uc $otor : undef, # AND and so on + $index ? %$index : (), + }; + } + + # We build a string query from limits and the queries. An alternative + # would be to pass them separately into build_query and let it build + # them into a structured ES query itself. Maybe later, though that'd be + # more robust. + my $query_str = join( ' AND ', + join( ' ', $self->_create_query_string(@search_params) ), + $self->_join_queries( $self->_convert_index_strings(@$limits) ) ); + + # If there's no query on the left, let's remove the junk left behind + $query_str =~ s/^ AND //; + my %options; + $options{sort} = \@sort_params; + my $query = $self->build_query( $query_str, %options ); + + #die Dumper($query); + # We roughly emulate the CGI parameters of the zebra query builder + my $query_cgi = 'idx=kw&q=' . uri_escape( $operands->[0] ) if @$operands; + my $simple_query = $operands->[0] if @$operands == 1; + my $query_desc = $simple_query; + my $limit = 'and ' . join( ' and ', @$limits ); + my $limit_cgi = + '&limit=' . join( '&limit=', map { uri_escape($_) } @$orig_limits ); + my $limit_desc = "@$limits"; + + return ( + undef, $query, $simple_query, $query_cgi, $query_desc, + $limit, $limit_cgi, $limit_desc, undef, undef + ); +} + +=head2 _convert_sort_fields + + my @sort_params = _convert_sort_fields(@sort_by) + +Converts the zebra-style sort index information into elasticsearch-style. + +C<@sort_by> is the same as presented to L, and it returns +something that can be sent to L. + +=cut + +sub _convert_sort_fields { + my ( $self, @sort_by ) = @_; + + # Turn the sorting into something we care about. + my %sort_field_convert = ( + acqdate => 'acqdate', + author => 'author', + call_number => 'callnum', + popularity => 'issues', + relevance => undef, # default + title => 'title', + pubdate => 'pubdate', + ); + my %sort_order_convert = + ( qw( dsc desc ), qw( asc asc ), qw( az asc ), qw( za desc ) ); + + # Convert the fields and orders, drop anything we don't know about. + grep { $_->{field} } map { + my ( $f, $d ) = split /_/; + { + field => $sort_field_convert{$f}, + direction => $sort_order_convert{$d} + } + } @sort_by; +} + +=head2 _convert_index_fields + + my @index_params = $self->_convert_index_fields(@indexes); + +Converts zebra-style search index notation into elasticsearch-style. + +C<@indexes> is an array of index names, as presented to L, +and it returns something that can be sent to L. + +B: this will pull from the elasticsearch mappings table to figure out +types. + +=cut + +our %index_field_convert = ( + 'kw' => '_all', + 'ti' => 'title', + 'au' => 'author', + 'su' => 'subject', + 'nb' => 'isbn', + 'se' => 'title-series', + 'callnum' => 'callnum', + 'mc-itype' => 'itype', + 'ln' => 'ln', + 'branch' => 'homebranch', + 'fic' => 'lf', + 'mus' => 'rtype', + 'aud' => 'ta', +); + +sub _convert_index_fields { + my ( $self, @indexes ) = @_; + + my %index_type_convert = + ( __default => undef, phr => 'phrase', rtrn => 'right-truncate' ); + + # Convert according to our table, drop anything that doesn't convert + grep { $_->{field} } map { + my ( $f, $t ) = split /,/; + { + field => $index_field_convert{$f}, + type => $index_type_convert{ $t // '__default' } + } + } @indexes; +} + +=head2 _convert_index_strings + + my @searches = $self->_convert_index_strings(@searches); + +Similar to L<_convert_index_fields>, this takes strings of the form +B and rewrites the field from zebra-style to +elasticsearch-style. Anything it doesn't understand is returned verbatim. + +=cut + +sub _convert_index_strings { + my ( $self, @searches ) = @_; + + my @res; + foreach my $s (@searches) { + next if $s eq ''; + my ( $field, $term ) = $s =~ /^\s*([\w,-]*?):(.*)/; + unless ( defined($field) && defined($term) ) { + push @res, $s; + next; + } + my ($conv) = $self->_convert_index_fields($field); + unless ( defined($conv) ) { + push @res, $s; + next; + } + push @res, $conv->{field} . ":" + . $self->_modify_string_by_type( %$conv, operand => $term ); + } + return @res; +} + +=head2 _modify_string_by_type + + my $str = $self->_modify_string_by_type(%index_field); + +If you have a search term (operand) and a type (phrase, right-truncated), this +will convert the string to have the function in lucene search terms, e.g. +wrapping quotes around it. + +=cut + +sub _modify_string_by_type { + my ( $self, %idx ) = @_; + + my $type = $idx{type} || ''; + my $str = $idx{operand}; + return $str unless $str; # Empty or undef, we can't use it. + + $str .= '*' if $type eq 'right-truncate'; + $str = '"' . $str . '"' if $type eq 'phrase'; + return $str; +} + +=head2 _convert_index_strings_freeform + + my $search = $self->_convert_index_strings_freeform($search); + +This is similar to L<_convert_index_strings>, however it'll search out the +things to change within the string. So it can handle strings such as +C<(su:foo) AND (su:bar)>, converting the C appropriately. + +=cut + +sub _convert_index_strings_freeform { + my ( $self, $search ) = @_; + + while ( my ( $zeb, $es ) = each %index_field_convert ) { + $search =~ s/\b$zeb:/$es:/g; + } + return $search; +} + +=head2 _join_queries + + my $query_str = $self->_join_queries(@query_parts); + +This takes a list of query parts, that might be search terms on their own, or +booleaned together, or specifying fields, or whatever, wraps them in +parentheses, and ANDs them all together. Suitable for feeding to the ES +query string query. + +=cut + +sub _join_queries { + my ( $self, @parts ) = @_; + + @parts = grep { defined($_) && $_ ne '' } @parts; + return () unless @parts; + return $parts[0] if @parts < 2; + join ' AND ', map { "($_)" } @parts; +} + +=head2 _make_phrases + + my @phrased_queries = $self->_make_phrases(@query_parts); + +This takes the supplied queries and forces them to be phrases by wrapping +quotes around them. It understands field prefixes, e.g. 'subject:' and puts +the quotes outside of them if they're there. + +=cut + +sub _make_phrases { + my ( $self, @parts ) = @_; + map { s/^\s*(\w*?:)(.*)$/$1"$2"/r } @parts; +} + +=head2 _create_query_string + + my @query_strings = $self->_create_query_string(@queries); + +Given a list of hashrefs, it will turn them into a lucene-style query string. +The hash should contain field, type (both for the indexes), operator, and +operand. + +=cut + +sub _create_query_string { + my ( $self, @queries ) = @_; + + map { + my $otor = $_->{operator} ? $_->{operator} . ' ' : ''; + my $field = $_->{field} ? $_->{field} . ':' : ''; + + my $oand = $self->_modify_string_by_type(%$_); + "$otor($field$oand)"; + } @queries; +} + +=head2 _clean_search_term + + my $term = $self->_clean_search_term($term); + +This cleans a search term by removing any funny characters that may upset +ES and give us an error. It also calls L<_convert_index_strings_freeform> +to ensure those parts are correct. + +=cut + +sub _clean_search_term { + my ( $self, $term ) = @_; + + $term = $self->_convert_index_strings_freeform($term); + $term =~ s/[{}]/"/g; + return $term; +} + +=head2 _fix_limit_special_cases + + my $limits = $self->_fix_limit_special_cases($limits); + +This converts any special cases that the limit specifications have into things +that are more readily processable by the rest of the code. + +The argument should be an arrayref, and it'll return an arrayref. + +=cut + +sub _fix_limit_special_cases { + my ( $self, $limits ) = @_; + + my @new_lim; + foreach my $l (@$limits) { + + # This is set up by opac-search.pl + if ( $l =~ /^yr,st-numeric,ge=/ ) { + my ( $start, $end ) = + ( $l =~ /^yr,st-numeric,ge=(.*) and yr,st-numeric,le=(.*)$/ ); + next unless defined($start) && defined($end); + push @new_lim, "copydate:[$start TO $end]"; + } + elsif ( $l =~ /^yr,st-numeric=/ ) { + my ($date) = ( $l =~ /^yr,st-numeric=(.*)$/ ); + next unless defined($date); + push @new_lim, "copydate:$date"; + } + elsif ( $l =~ /^available$/ ) { + push @new_lim, 'onloan:false'; + } + else { + push @new_lim, $l; + } + } + return \@new_lim; +} + +1; diff --git a/Koha/SearchEngine/Zebra/Search.pm b/Koha/SearchEngine/Zebra/Search.pm new file mode 100644 index 0000000000..535b4286bf --- /dev/null +++ b/Koha/SearchEngine/Zebra/Search.pm @@ -0,0 +1,76 @@ +package Koha::SearchEngine::Zebra::Search; + +# This file is part of Koha. +# +# Copyright 2012 BibLibre +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +# I don't think this ever worked right +#use Moose::Role; +#with 'Koha::SearchEngine::SearchRole'; + +use base qw(Class::Accessor); +# Removed because it doesn't exist/doesn't work. +#use Data::SearchEngine::Zebra; +#use Data::SearchEngine::Query; +#use Koha::SearchEngine::Zebra; +#use Data::Dump qw(dump); + +use C4::Search; # :( + +# Broken without the Data:: stuff +#has searchengine => ( +# is => 'rw', +# isa => 'Koha::SearchEngine::Zebra', +# default => sub { Koha::SearchEngine::Zebra->new }, +# lazy => 1 +#); + +sub search { + my ($self,$query_string) = @_; + + my $query = Data::SearchEngine::Query->new( + count => 10, + page => 1, + query => $query_string, + ); + + warn "search for $query_string"; + + my $results = $self->searchengine->search($query); + + foreach my $item (@{ $results->items }) { + my $title = $item->get_value('ste_title'); + #utf8::encode($title); + print "$title\n"; + warn dump $title; + } +} + +=head2 search_compat + +This passes straight through to C4::Search::getRecords. + +=cut + +sub search_compat { + shift; # get rid of $self + + return getRecords(@_); +} + +sub dosmth {'bou' } + +1; diff --git a/Koha/Template/Plugin/Price.pm b/Koha/Template/Plugin/Price.pm index 44f7cd8170..73b6871c58 100644 --- a/Koha/Template/Plugin/Price.pm +++ b/Koha/Template/Plugin/Price.pm @@ -17,6 +17,7 @@ package Koha::Template::Plugin::Price; # You should have received a copy of the GNU General Public License # along with Koha; if not, see . +use base qw(Class::Accessor); use Modern::Perl; use Template::Plugin::Filter; @@ -34,4 +35,10 @@ sub filter { : Koha::Number::Price->new( $value )->format; } +sub build_query_compat { + # Because this passes directly on to C4::Search, we have no trouble being + # compatible. + build_query(@_); +} + 1; diff --git a/installer/data/mysql/elasticsearch_mapping.sql b/installer/data/mysql/elasticsearch_mapping.sql new file mode 100644 index 0000000000..6a0dd0d668 --- /dev/null +++ b/installer/data/mysql/elasticsearch_mapping.sql @@ -0,0 +1,148 @@ +DROP TABLE IF EXISTS elasticsearch_mapping; +CREATE TABLE `elasticsearch_mapping` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `mapping` varchar(255) DEFAULT NULL, + `type` varchar(255) NOT NULL, + `facet` boolean DEFAULT FALSE, + `marc21` varchar(255) DEFAULT NULL, + `unimarc` varchar(255) DEFAULT NULL, + `normarc` varchar(255) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=126 DEFAULT CHARSET=utf8; + + + +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('llength',FALSE,'','leader_/1-5',NULL,'leader_/1-5'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('rtype',FALSE,'','leader_/6',NULL,'leader_/6'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('bib-level',FALSE,'','leader_/7',NULL,'leader_/7'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('control-number',FALSE,'','001',NULL,'001'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('local-number',FALSE,'',NULL,'001',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('date-time-last-modified',FALSE,'','005','099d',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('microform-generation',FALSE,'','007_/11',NULL,'007_/11'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('material-type',FALSE,'','007','200b','007'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ff7-00',FALSE,'','007_/1',NULL,'007_/1'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ff7-01',FALSE,'','007_/2',NULL,'007_/2'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ff7-02',FALSE,'','007_/3',NULL,'007_/3'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ff7-01-02',FALSE,'','007_/1-2',NULL,'007_/1-2'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('date-entered-on-file',FALSE,'','008_/1-5','099c','008_/1-5'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('pubdate',FALSE,'','008_/7-10','100a_/9-12','008_/7-10'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('pl',FALSE,'','008_/15-17','210a','008_/15-17'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ta',FALSE,'','008_/22','100a_/17','008_/22'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ff8-23',FALSE,'','008_/23',NULL,'008_/23'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ff8-29',FALSE,'','008_/29','105a_/8','008_/29'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('lf',FALSE,'','008_/33','105a_/11','008_/33'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('bio',FALSE,'','008_/34','105a_/12','008_/34'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ln',FALSE,'','008_/35-37','101a','008_/35-37'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('ctype',FALSE,'','008_/24-27','105a_/4-7','008_/24-27'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('record-source',FALSE,'','008_/39','995c','008_/39'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('lc-cardnumber',FALSE,'','010','995j','010'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('lc-cardnumber',FALSE,'','011',NULL,NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','010',NULL,'010'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','011',NULL,NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('bnb-card-number',FALSE,'','015',NULL,'015'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('bgf-number',FALSE,'','015',NULL,'015'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('number-db',FALSE,'','015',NULL,'015'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('number-natl-biblio',FALSE,'','015',NULL,'015'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','015',NULL,'015'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('number-legal-deposit',FALSE,'','017',NULL,NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','017',NULL,NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','018',NULL,NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','020a','010az','020a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('isbn',FALSE,'','020a','010az','020a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('identifier-standard',FALSE,'','022a','011ayz','022a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('issn',FALSE,'','022a','011ayz','022a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('author',TRUE,'string','100a','200f','100a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('author',TRUE,'string','110a','200g','110a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('author',TRUE,'string','111a',NULL,'111a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('author',TRUE,'string','700a','700a','700a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('author',FALSE,'string','245c','701','245c'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','245a','200a','245a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','246','200c','246'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','247','200d','247'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','490','200e','490a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','505t','200h',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','711t','200i','711t'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','700t','205','700t'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','710t','304a','710t'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','730','327a','730'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','740','327b','740'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','780','327c','780'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','785','327d','785'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','130','327e','130'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','210','327f','210'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','211','327g',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','212','327h',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','214','327i',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','222','328t','222'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string','240','410t','240'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'411t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'412t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'413t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'421t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'422t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'423t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'424t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'425t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'430t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'431t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'432t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'433t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'434t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'435t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'436t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'437t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'440t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'441t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'442t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'443t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'444t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'445t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'446t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'447t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'448t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'451t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'452t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'453t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'454t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'455t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'456t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'461t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'462t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'463t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'464t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'470t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'481t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'482t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('title',FALSE,'string',NULL,'488t',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','600a','600a','600a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','600t','600','600t'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','610a','601','610a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','610t','602','610t'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','611','604','611'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','630n','605','630n'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','630r','606','630r'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650a','607','650a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650b',NULL,'650b'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650c',NULL,'650c'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650d',NULL,'650d'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650v',NULL,'650v'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650x',NULL,'650x'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650y',NULL,'650y'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','650z',NULL,'650z'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','651','608','651'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('subject',TRUE,'string','653a','610','653'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('local-classification',FALSE,'','952o','995k','952o'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('local-classification',FALSE,'',NULL,'686',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('local-number',FALSE,'','999c','001','999c'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('local-number',FALSE,'',NULL,'0909',NULL); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('itype',TRUE,'string','942c','200b','942c'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('itype',TRUE,'string','952y','995r','952y'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('acqdate',FALSE,'date','952d','9955','952y'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('place',TRUE,'string','260a','210a','260a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('publisher',TRUE,'string','260b','210c','260b'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('copydate',TRUE,'date','260c',NULL,'260c'); -- No copydate for unimarc? Seems strange. +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('homebranch',TRUE,'string','952a','995b','952a'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('holdingbranch',TRUE,'string','952b','995c','952b'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('onloan',FALSE,'boolean','952q','995n','952q'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('itemnumber',FALSE,'number','9529','9959','9529'); +INSERT INTO `elasticsearch_mapping` (`mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('issues',FALSE,'sum','952l',NULL,'952l'); -- Apparently not tracked in unimarc diff --git a/installer/data/mysql/kohastructure.sql b/installer/data/mysql/kohastructure.sql index 242755dff6..c6129e2fbc 100644 --- a/installer/data/mysql/kohastructure.sql +++ b/installer/data/mysql/kohastructure.sql @@ -981,6 +981,21 @@ CREATE TABLE `deleteditems` ( KEY `itype_idx` (`itype`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +-- +-- Table structure for table `elasticsearch_mapping` +-- + +DROP TABLE IF EXISTS `elasticsearch_mapping`; +CREATE TABLE `elasticsearch_mapping` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + `mapping` varchar(255) DEFAULT NULL, + `type` varchar(255) DEFAULT NULL, + `marc21` varchar(255) DEFAULT NULL, + `unimarc` varchar(255) DEFAULT NULL, + `normarc` varchar(255) DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=24 DEFAULT CHARSET=utf8; + -- -- Table structure for table `export_format` -- diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref index fe88fde281..c9b723019f 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref @@ -211,3 +211,12 @@ Administration: subscription: "subscription" - will be shown on the Hea Koha community website. - Note that this value has no effect if the UsageStats system preference is set to "Don't share" + Search Engine: + - + - pref: SearchEngine + default: Zebra + choices: + Solr: Solr + Zebra: Zebra + Elasticsearch: Elasticsearch + - is the search engine used. diff --git a/misc/search_tools/rebuild_elastic_search.pl b/misc/search_tools/rebuild_elastic_search.pl new file mode 100755 index 0000000000..055bd1e087 --- /dev/null +++ b/misc/search_tools/rebuild_elastic_search.pl @@ -0,0 +1,148 @@ +#!/usr/bin/perl + +# This inserts records from a Koha database into elastic search + +# Copyright 2014 Catalyst IT +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +=head1 NAME + +rebuild_elastic_search.pl - inserts records from a Koha database into Elasticsearch + +=head1 SYNOPSIS + +B +[B<-c|--commit>=C] +[B<-v|--verbose>] +[B<-h|--help>] +[B<--man>] + +=head1 DESCRIPTION + +=head1 OPTIONS + +=over + +=item B<-c|--commit>=C + +Specify how many records will be batched up before they're added to Elasticsearch. +Higher should be faster, but will cause more RAM usage. Default is 100. + +=item B<-d|--delete> + +Delete the index and recreate it before indexing. + +=item B<-b|--biblionumber> + +Only index the supplied biblionumber, mostly for testing purposes. May be +repeated. + +=item B<-v|--verbose> + +By default, this program only emits warnings and errors. This makes it talk +more. Add more to make it even more wordy, in particular when debugging. + +=item B<-h|--help> + +Help! + +=item B<--man> + +Full documentation. + +=cut + +use autodie; +use Getopt::Long; +use Koha::Biblio; +use Koha::ElasticSearch::Indexer; +use MARC::Field; +use MARC::Record; +use Modern::Perl; +use Pod::Usage; + +use Data::Dumper; # TODO remove + +my $verbose = 0; +my $commit = 100; +my ($delete, $help, $man); +my (@biblionumbers); + +GetOptions( + 'c|commit=i' => \$commit, + 'd|delete' => \$delete, + 'b|biblionumber=i' => \@biblionumbers, + 'v|verbose!' => \$verbose, + 'h|help' => \$help, + 'man' => \$man, +); + +pod2usage(1) if $help; +pod2usage( -exitstatus => 0, -verbose => 2 ) if $man; + +my $next; +if (@biblionumbers) { + $next = sub { + my $r = shift @biblionumbers; + return () unless defined $r; + return ($r, Koha::Biblio->get_marc_biblio($r, item_data => 1)); + }; +} else { + my $records = Koha::Biblio->get_all_biblios_iterator(); + $next = sub { + $records->next(); + } +} +my $indexer = Koha::ElasticSearch::Indexer->new({index => 'biblios' }); +if ($delete) { + # We know it's safe to not recreate the indexer because update_index + # hasn't been called yet. + $indexer->delete_index(); +} + +my $count = 0; +my $commit_count = $commit; +my (@bibnums_buffer, @commit_buffer); +while (scalar(my ($bibnum, $rec) = $next->())) { + _log(1,"$bibnum\n"); + $count++; + + push @bibnums_buffer, $bibnum; + push @commit_buffer, $rec; + if (!(--$commit_count)) { + _log(2, "Committing...\n"); + $indexer->update_index(\@bibnums_buffer, \@commit_buffer); + $commit_count = $commit; + @bibnums_buffer = (); + @commit_buffer = (); + } +} +# There are probably uncommitted records +$indexer->update_index(\@bibnums_buffer, \@commit_buffer); +_log(1, "$count records indexed.\n"); + +# Output progress information. +# +# _log($level, $msg); +# +# Will output $msg if the verbosity setting is set to $level or more. Will +# not include a trailing newline. +sub _log { + my ($level, $msg) = @_; + + print $msg if ($verbose <= $level); +} diff --git a/opac/elasticsearch.pl b/opac/elasticsearch.pl new file mode 100755 index 0000000000..b9fccaddbc --- /dev/null +++ b/opac/elasticsearch.pl @@ -0,0 +1,102 @@ +#!/usr/bin/perl + +# Copyright 2013 Catalyst +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with Koha; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +use Modern::Perl; + +use C4::Context; +use CGI; +use C4::Auth; +use C4::Koha; +use C4::Output; + +# TODO this should use the moose thing that auto-picks. +use Koha::SearchEngine::Elasticsearch::QueryBuilder; +use Koha::ElasticSearch::Search; + +my $cgi = new CGI; + +my $template_name; +my $template_type = "basic"; +if ( $cgi->param("idx") or $cgi->param("q") ) { + $template_name = 'search/results.tt'; +} +else { + $template_name = 'search/advsearch.tt'; + $template_type = 'advsearch'; +} + +# load the template +my ( $template, $borrowernumber, $cookie ) = get_template_and_user( + { + template_name => $template_name, + query => $cgi, + type => "opac", + authnotrequired => 1, + } +); +my %template_params; +my $format = $cgi->param("format") || 'html'; + +# load the Type stuff +my $itemtypes = GetItemTypes; + +my $page = $cgi->param("page") || 1; +my $count = + $cgi->param('count') + || C4::Context->preference('OPACnumSearchResults') + || 20; +my $q = $cgi->param("q"); + +my $searcher = Koha::ElasticSearch::Search->new(); +my $builder = Koha::SearchEngine::Elasticsearch::QueryBuilder->new(); +my $query; +if ($cgi->param('type') eq 'browse') { + $query = $builder->build_browse_query($cgi->param('browse_field') || undef, $q ); + $template_params{browse} = 1; +} else { + $query = $builder->build_query($q); +} +my $results = $searcher->search( $query, $page, $count ); +#my $results = $searcher->search( { "match_phrase_prefix" => { "title" => "the" } } ); + +# This is temporary, but will do the job for now. +my @hits; +$results->each(sub { + push @hits, { _source => @_[0] }; + }); +# Make a list of the page numbers +my @pages = map { { page => $_, current => ($_ == ( $page || 1)) } } 1 .. int($results->total / $count); +my $max_page = int($results->total / $count); +# Pager template params +$template->param( + SEARCH_RESULTS => \@hits, + PAGE_NUMBERS => \@pages, + total => $results->total, + previous_page => ( $page > 1 ? $page - 1 : undef ), + next_page => ( $page < $max_page ? $page + 1 : undef ), + follower_params => [ + { var => 'type', val => $cgi->param('type') }, + { var => 'q', val => $q }, + { var => 'count', val => $count }, + ], + %template_params, +); + +my $content_type = ( $format eq 'rss' or $format eq 'atom' ) ? $format : 'html'; +output_with_http_headers $cgi, $cookie, $template->output, $content_type; diff --git a/opac/opac-search.pl b/opac/opac-search.pl index 0e89100356..5814a2bf0b 100755 --- a/opac/opac-search.pl +++ b/opac/opac-search.pl @@ -30,6 +30,35 @@ use Modern::Perl; use C4::Context; use List::MoreUtils q/any/; +use Data::Dumper; # TODO remove + +use Koha::SearchEngine::Elasticsearch::QueryBuilder; +use Koha::ElasticSearch::Search; +use Koha::SearchEngine::Zebra::QueryBuilder; +use Koha::SearchEngine::Zebra::Search; + +my $searchengine = C4::Context->preference("SearchEngine"); +my ($builder, $searcher); +#$searchengine = 'Zebra'; # XXX +for ( $searchengine ) { + when ( /^Solr$/ ) { + warn "We use Solr"; + require 'opac/search.pl'; + exit; + } + when ( /^Zebra$/ ) { + $builder=Koha::SearchEngine::Zebra::QueryBuilder->new(); + $searcher=Koha::SearchEngine::Zebra::Search->new(); + } + when (/^Elasticsearch$/) { + # Should use the base QueryBuilder, but I don't have it wired up + # for moose yet. + $builder=Koha::SearchEngine::Elasticsearch::QueryBuilder->new(); +# $builder=Koha::SearchEngine::Zebra::QueryBuilder->new(); + $searcher=Koha::ElasticSearch::Search->new({index => 'biblios'}); + } +} + use C4::Output; use C4::Auth qw(:DEFAULT get_session); use C4::Languages qw(getLanguages); @@ -525,7 +554,7 @@ my ($error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_ my @results; ## I. BUILD THE QUERY -( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang); +( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = $builder->build_query_compat(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang); sub _input_cgi_parse { my @elements; @@ -605,11 +634,12 @@ if ($tag) { $pasarParams .= '&simple_query=' . uri_escape_utf8($simple_query); $pasarParams .= '&query_type=' . uri_escape_utf8($query_type) if ($query_type); eval { - ($error, $results_hashref, $facets) = getRecords($query,$simple_query,\@sort_by,\@servers,$results_per_page,$offset,$expanded_facet,$branches,$itemtypes_nocategory,$query_type,$scan,1); - }; + ($error, $results_hashref, $facets) = $searcher->search_compat($query,$simple_query,\@sort_by,\@servers,$results_per_page,$offset,$expanded_facet,$branches,$itemtypes,$query_type,$scan,1); +}; } + # This sorts the facets into alphabetical order -if ($facets) { +if ($facets && @$facets) { foreach my $f (@$facets) { $f->{facets} = [ sort { uc($a->{facet_label_value}) cmp uc($b->{facet_label_value}) } @{ $f->{facets} } ]; } diff --git a/t/Koha_ElasticSearch.t b/t/Koha_ElasticSearch.t new file mode 100644 index 0000000000..8888f2c9c8 --- /dev/null +++ b/t/Koha_ElasticSearch.t @@ -0,0 +1,23 @@ +# +#=============================================================================== +# +# FILE: Koha_ElasticSearch.t +# +# DESCRIPTION: +# +# FILES: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Chris Cormack (rangi), chrisc@catalyst.net.nz +# ORGANIZATION: Koha Development Team +# VERSION: 1.0 +# CREATED: 09/12/13 08:56:44 +# REVISION: --- +#=============================================================================== + +use strict; +use warnings; + +use Test::More tests => 1; # last test to print + +use_ok('Koha::ElasticSearch'); diff --git a/t/Koha_ElasticSearch_Indexer.t b/t/Koha_ElasticSearch_Indexer.t new file mode 100644 index 0000000000..6de6a32bc2 --- /dev/null +++ b/t/Koha_ElasticSearch_Indexer.t @@ -0,0 +1,51 @@ +# +#=============================================================================== +# +# FILE: Koha_ElasticSearch_Indexer.t +# +# DESCRIPTION: +# +# FILES: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Chris Cormack (rangi), chrisc@catalyst.net.nz +# ORGANIZATION: Koha Development Team +# VERSION: 1.0 +# CREATED: 09/12/13 08:57:25 +# REVISION: --- +#=============================================================================== + +use strict; +use warnings; + +use Test::More tests => 5; # last test to print +use MARC::Record; + +use_ok('Koha::ElasticSearch::Indexer'); + +my $indexer; +ok( + my $indexer = Koha::ElasticSearch::Indexer->new( + { + 'nodes' => ['localhost:9200'], + 'index' => 'mydb' + } + ), + 'Creating new indexer object' +); + +my $marc_record = MARC::Record->new(); +my $field = MARC::Field->new( '001', '1234567' ); +$marc_record->append_fields($field); +$field = MARC::Field->new( '020', '', '', 'a' => '1234567890123' ); +$marc_record->append_fields($field); +$field = MARC::Field->new( '245', '', '', 'a' => 'Title' ); +$marc_record->append_fields($field); + +my $records = [$marc_record]; +ok( my $converted = $indexer->convert_marc_to_json($records), + 'Convert some records' ); + +is( $converted->count, 1, 'One converted record' ); + +ok( $indexer->update_index($records), 'Update Index' ); diff --git a/t/Koha_ElasticSearch_Search.t b/t/Koha_ElasticSearch_Search.t new file mode 100644 index 0000000000..081b162e66 --- /dev/null +++ b/t/Koha_ElasticSearch_Search.t @@ -0,0 +1,38 @@ +# +#=============================================================================== +# +# FILE: Koha_ElasticSearch_Search.t +# +# DESCRIPTION: +# +# FILES: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Chris Cormack (rangi), chrisc@catalyst.net.nz +# ORGANIZATION: Koha Development Team +# VERSION: 1.0 +# CREATED: 09/12/13 09:43:29 +# REVISION: --- +#=============================================================================== + +use strict; +use warnings; + +use Test::More tests => 5; # last test to print + +use_ok('Koha::ElasticSearch::Search'); + +ok( + my $searcher = Koha::ElasticSearch::Search->new( + { 'nodes' => ['localhost:9200'], 'index' => 'mydb' } + ), + 'Creating a Koha::ElasticSearch::Search object' +); + +is( $searcher->index, 'mydb', 'Testing basic accessor' ); + +ok( $searcher->connect, 'Connect to ElasticSearch server' ); +ok( my $results = $searcher->search( { record => 'easy' } ), 'Do a search ' ); + +ok( my $marcresults = $searcher->marc_search( { record => 'Fish' } ), + 'Do a marc search' ); -- 2.39.5