From 374f1e6384437c8f7e8dd8d9a6ed8889ad3dd800 Mon Sep 17 00:00:00 2001 From: Robin Sheat Date: Fri, 17 Jul 2015 14:38:19 +1200 Subject: [PATCH] Bug 12478: ES is now updated when records are updated/deleted Signed-off-by: Nick Clemens Signed-off-by: Jesse Weaver Signed-off-by: Tomas Cohen Arazi Signed-off-by: Kyle M Hall Signed-off-by: Brendan Gallagher --- C4/Biblio.pm | 84 ++++++++++++------- Koha/ElasticSearch/Indexer.pm | 50 +++++++++-- .../Elasticsearch/QueryBuilder.pm | 7 +- .../data/mysql/elasticsearch_mapping.sql | 3 + misc/search_tools/rebuild_elastic_search.pl | 2 +- 5 files changed, 107 insertions(+), 39 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 06d0ac1d48..ea93b445f9 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -37,12 +37,15 @@ use C4::ClassSource; use C4::Charset; use C4::Linker; use C4::OAI::Sets; +use C4::Debug; use Koha::Cache; use Koha::Authority::Types; use Koha::Acquisition::Currencies; +use Koha::SearchEngine; use vars qw(@ISA @EXPORT); +use vars qw($debug $cgi_debug); BEGIN { @@ -2888,39 +2891,68 @@ sub TransformMarcToKohaOneField { =head2 ModZebra - ModZebra( $biblionumber, $op, $server ); + ModZebra( $biblionumber, $op, $server, $record ); $biblionumber is the biblionumber we want to index -$op is specialUpdate or delete, and is used to know what we want to do +$op is specialUpdate or recordDelete, and is used to know what we want to do $server is the server that we want to update +$record is the update MARC record if it's available. If it's not supplied +and is needed, it'll be loaded from the database. + =cut sub ModZebra { ###Accepts a $server variable thus we can use it for biblios authorities or other zebra dbs - my ( $biblionumber, $op, $server ) = @_; - my $dbh = C4::Context->dbh; + my ( $biblionumber, $op, $server, $record ) = @_; + $debug && warn "ModZebra: update requested for: $biblionumber $op $server\n"; + if ( C4::Context->preference('SearchEngine') eq 'Elasticsearch' ) { - # true ModZebra commented until indexdata fixes zebraDB crashes (it seems they occur on multiple updates - # at the same time - # replaced by a zebraqueue table, that is filled with ModZebra to run. - # the table is emptied by rebuild_zebra.pl script (using the -z switch) + # TODO abstract to a standard API that'll work for whatever + require Koha::ElasticSearch::Indexer; + my $indexer = Koha::ElasticSearch::Indexer->new( + { + index => $server eq 'biblioserver' + ? $Koha::SearchEngine::BIBLIOS_INDEX + : $Koha::SearchEngine::AUTHORITIES_INDEX + } + ); + if ( $op eq 'specialUpdate' ) { + unless ($record) { + $record = GetMarcBiblio($biblionumber, 1); + } + my $records = [$record]; + $indexer->update_index_background( [$biblionumber], [$record] ); + } + elsif ( $op eq 'recordDelete' ) { + $indexer->delete_index_background( [$biblionumber] ); + } + else { + croak "ModZebra called with unknown operation: $op"; + } + } else { + my $dbh = C4::Context->dbh; - my $check_sql = "SELECT COUNT(*) FROM zebraqueue - WHERE server = ? - AND biblio_auth_number = ? - AND operation = ? - AND done = 0"; - my $check_sth = $dbh->prepare_cached($check_sql); - $check_sth->execute( $server, $biblionumber, $op ); - my ($count) = $check_sth->fetchrow_array; - $check_sth->finish(); - if ( $count == 0 ) { - my $sth = $dbh->prepare("INSERT INTO zebraqueue (biblio_auth_number,server,operation) VALUES(?,?,?)"); - $sth->execute( $biblionumber, $server, $op ); - $sth->finish; + # true ModZebra commented until indexdata fixes zebraDB crashes (it seems they occur on multiple updates + # at the same time + # replaced by a zebraqueue table, that is filled with ModZebra to run. + # the table is emptied by rebuild_zebra.pl script (using the -z switch) + my $check_sql = "SELECT COUNT(*) FROM zebraqueue + WHERE server = ? + AND biblio_auth_number = ? + AND operation = ? + AND done = 0"; + my $check_sth = $dbh->prepare_cached($check_sql); + $check_sth->execute( $server, $biblionumber, $op ); + my ($count) = $check_sth->fetchrow_array; + $check_sth->finish(); + if ( $count == 0 ) { + my $sth = $dbh->prepare("INSERT INTO zebraqueue (biblio_auth_number,server,operation) VALUES(?,?,?)"); + $sth->execute( $biblionumber, $server, $op ); + $sth->finish; + } } } @@ -3468,15 +3500,7 @@ sub ModBiblioMarc { $sth = $dbh->prepare("UPDATE biblioitems SET marc=?,marcxml=? WHERE biblionumber=?"); $sth->execute( $record->as_usmarc(), $record->as_xml_record($encoding), $biblionumber ); $sth->finish; - if ( C4::Context->preference('SearchEngine') eq 'ElasticSearch' ) { -# shift to its on sub, so it can do it realtime or queue - can_load( modules => { 'Koha::ElasticSearch::Indexer' => undef } ); - # need to get this from syspref probably biblio/authority for index - my $indexer = Koha::ElasticSearch::Indexer->new(); - my $records = [$record]; - $indexer->update_index([$biblionumber], $records); - } - ModZebra( $biblionumber, "specialUpdate", "biblioserver" ); + ModZebra( $biblionumber, "specialUpdate", "biblioserver", $record ); return $biblionumber; } diff --git a/Koha/ElasticSearch/Indexer.pm b/Koha/ElasticSearch/Indexer.pm index cc450dc5fc..ab196a79f8 100644 --- a/Koha/ElasticSearch/Indexer.pm +++ b/Koha/ElasticSearch/Indexer.pm @@ -34,8 +34,9 @@ Koha::ElasticSearch::Indexer - handles adding new records to the index =head1 SYNOPSIS - my $indexer = Koha::ElasticSearch::Indexer->new({ index => 'biblios' }); - $indexer->delete_index(); + my $indexer = Koha::ElasticSearch::Indexer->new( + { index => Koha::SearchEngine::BIBLIOS_INDEX } ); + $indexer->drop_index(); $indexer->update_index(\@biblionumbers, \@records); =head1 FUNCTIONS @@ -61,6 +62,8 @@ If that's a problem, clone them first. sub update_index { my ($self, $biblionums, $records) = @_; + # TODO should have a separate path for dealing with a large number + # of records at once where we use the bulk update functions in ES. if ($biblionums) { $self->_sanitise_records($biblionums, $records); } @@ -100,14 +103,51 @@ sub update_index_background { $self->update_index(@_); } -=head2 $indexer->delete_index(); +=head2 $indexer->delete_index($biblionums) -Deletes the index from the elasticsearch server. Calling C -after this will recreate it again. +C<$biblionums> is an arrayref of biblionumbers to delete from the index. =cut sub delete_index { + my ($self, $biblionums) = @_; + + if ( !$self->store ) { + my $params = $self->get_elasticsearch_params(); + $self->store( + Catmandu::Store::ElasticSearch->new( + %$params, + index_settings => $self->get_elasticsearch_settings(), + index_mappings => $self->get_elasticsearch_mappings(), + trace_calls => 1, + ) + ); + } + $self->store->bag->delete($_) foreach @$biblionums; + $self->store->bag->commit; +} + +=head2 $indexer->delete_index_background($biblionums) + +Identical to L, this will return immediately and start a +background process to do the actual deleting. + +=cut + +# TODO implement in the future + +sub delete_index_background { + my $self = shift; + $self->delete_index(@_); +} +=head2 $indexer->drop_index(); + +Drops the index from the elasticsearch server. Calling C +after this will recreate it again. + +=cut + +sub drop_index { my ($self) = @_; if (!$self->store) { diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index c89f3e67e5..386073c6f4 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -494,6 +494,7 @@ our %index_field_convert = ( 'fic' => 'lf', 'mus' => 'rtype', 'aud' => 'ta', + 'hi' => 'Host-Item-Number', ); sub _convert_index_fields { @@ -683,11 +684,11 @@ to ensure those parts are correct. sub _clean_search_term { my ( $self, $term ) = @_; - $term = $self->_convert_index_strings_freeform($term); - $term =~ s/[{}]/"/g; # Some hardcoded searches (like with authorities) produce things like - # 'an=123', when it ought to be 'an:123'. + # 'an=123', when it ought to be 'an:123' for our purposes. $term =~ s/=/:/g; + $term = $self->_convert_index_strings_freeform($term); + $term =~ s/[{}]/"/g; return $term; } diff --git a/installer/data/mysql/elasticsearch_mapping.sql b/installer/data/mysql/elasticsearch_mapping.sql index 92717ee4da..6af436c780 100644 --- a/installer/data/mysql/elasticsearch_mapping.sql +++ b/installer/data/mysql/elasticsearch_mapping.sql @@ -1,3 +1,5 @@ +-- For now I'm keeping this form of table as it's easier to edit. When we get +-- an interface, then we can then use the real form directly. DROP TABLE IF EXISTS elasticsearch_mapping; DROP TABLE IF EXISTS search_marc_to_field; DROP TABLE IF EXISTS search_field; @@ -228,6 +230,7 @@ INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `type`, `m INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','an',FALSE,'number',NULL,'6179',NULL); INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','an',FALSE,'number',NULL,'6209',NULL); INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','an',FALSE,'number',NULL,'6219',NULL); +INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('biblios','Host-Item-Number',FALSE,'number','7739','4619','7739'); -- Authorities: incomplete INSERT INTO `elasticsearch_mapping` (`indexname`, `mapping`, `facet`, `type`, `marc21`, `unimarc`, `normarc`) VALUES ('authorities','Local-number',FALSE,'string','001',NULL,'001'); diff --git a/misc/search_tools/rebuild_elastic_search.pl b/misc/search_tools/rebuild_elastic_search.pl index 84bb110e3c..4cbecdb73f 100755 --- a/misc/search_tools/rebuild_elastic_search.pl +++ b/misc/search_tools/rebuild_elastic_search.pl @@ -157,7 +157,7 @@ sub do_reindex { # We know it's safe to not recreate the indexer because update_index # hasn't been called yet. - $indexer->delete_index(); + $indexer->drop_index(); } my $count = 0; -- 2.39.5