From 4d1566d3244c765e2d7c13e941e0714f63bcd0ef Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Tue, 5 Jan 2021 14:42:20 +0100 Subject: [PATCH] Bug 27344: Implement Elastic's update_index_background using Koha::BackgroundJob This patch adds a background job submodule, UpdateElasticIndex, to deal with async ES index update (not the deletion). Using NYTProf (on a checkin): Without 618ms, executing 35676 statements and 26355 subroutine calls in 266 source files and 83 string evals. With 521ms, executing 13282 statements and 7979 subroutine calls in 195 source files and 26 string evals. However there are some problems with this patch: 1. We don't want *all* the index update to be in the background_jobs tabtle (we could add a filter on the list view) 2. We don't track the "progress" of the job as we are sending all the records to Elastic. It is okish in my opinion but it must be noted. Signed-off-by: Arthur Suzuki Signed-off-by: Martin Renvoize Signed-off-by: Fridolin Somers --- Koha/BackgroundJob.pm | 1 + Koha/BackgroundJob/UpdateElasticIndex.pm | 132 ++++++++++++++++++ Koha/SearchEngine/Elasticsearch/Indexer.pm | 53 ++++--- .../background_jobs_update_elastic_index.inc | 23 +++ .../prog/en/modules/admin/background_jobs.tt | 2 + 5 files changed, 188 insertions(+), 23 deletions(-) create mode 100644 Koha/BackgroundJob/UpdateElasticIndex.pm create mode 100644 koha-tmpl/intranet-tmpl/prog/en/includes/background_jobs_update_elastic_index.inc diff --git a/Koha/BackgroundJob.pm b/Koha/BackgroundJob.pm index c661746d7d..583ada7421 100644 --- a/Koha/BackgroundJob.pm +++ b/Koha/BackgroundJob.pm @@ -284,6 +284,7 @@ sub core_types_to_classes { batch_item_record_deletion => 'Koha::BackgroundJob::BatchDeleteItem', batch_item_record_modification => 'Koha::BackgroundJob::BatchUpdateItem', batch_hold_cancel => 'Koha::BackgroundJob::BatchCancelHold', + update_elastic_index => 'Koha::BackgroundJob::UpdateElasticIndex', }; } diff --git a/Koha/BackgroundJob/UpdateElasticIndex.pm b/Koha/BackgroundJob/UpdateElasticIndex.pm new file mode 100644 index 0000000000..700c1e0854 --- /dev/null +++ b/Koha/BackgroundJob/UpdateElasticIndex.pm @@ -0,0 +1,132 @@ +package Koha::BackgroundJob::UpdateElasticIndex; + +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . + +use Modern::Perl; +use JSON qw( encode_json decode_json ); + +use Koha::BackgroundJobs; +use Koha::DateUtils qw( dt_from_string ); +use C4::Biblio; +use C4::MarcModificationTemplates; + +use base 'Koha::BackgroundJob'; + +=head1 NAME + +Koha::BackgroundJob::UpdateElasticIndex - Update Elastic index + +This is a subclass of Koha::BackgroundJob. + +=head1 API + +=head2 Class methods + +=head3 job_type + +Define the job type of this job: update_elastic_index + +=cut + +sub job_type { + return 'update_elastic_index'; +} + +=head3 process + +Process the modification. + +=cut + +sub process { + my ( $self, $args ) = @_; + + my $job = Koha::BackgroundJobs->find( $args->{job_id} ); + + if ( !exists $args->{job_id} || !$job || $job->status eq 'cancelled' ) { + return; + } + + # FIXME If the job has already been started, but started again (worker has been restart for instance) + # Then we will start from scratch and so double process the same records + + my $job_progress = 0; + $job->started_on(dt_from_string) + ->progress($job_progress) + ->status('started') + ->store; + + my @record_ids = @{ $args->{record_ids} }; + my $record_server = $args->{record_server}; + + my $report = { + total_records => scalar @record_ids, + total_success => 0, + }; + + my @messages; + eval { + my $es_index = + $record_server eq "authorityserver" + ? $Koha::SearchEngine::AUTHORITIES_INDEX + : $Koha::SearchEngine::BIBLIOS_INDEX; + my $indexer = Koha::SearchEngine::Indexer->new({ index => $es_index }); + $indexer->update_index(\@record_ids); + }; + if ( $@ ) { + push @messages, { + type => 'error', + code => 'index_error', + error => $@, + + } + } else { + # FIXME This is not correct if some record_ids have been skipped + $report->{total_success} = scalar @record_ids; + } + + my $job_data = decode_json $job->data; + $job_data->{messages} = \@messages; + $job_data->{report} = $report; + + $job->ended_on(dt_from_string) + ->data(encode_json $job_data); + $job->status('finished') if $job->status ne 'cancelled'; + $job->store; +} + +=head3 enqueue + +Enqueue the new job + +=cut + +sub enqueue { + my ( $self, $args) = @_; + + return unless exists $args->{record_server}; + return unless exists $args->{record_ids}; + + my $record_server = $args->{record_server}; + my @record_ids = @{ $args->{record_ids} }; + + $self->SUPER::enqueue({ + job_size => scalar @record_ids, + job_args => {record_server => $record_server, record_ids => \@record_ids}, + }); +} + +1; diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm index 651a373401..1aa426a6a2 100644 --- a/Koha/SearchEngine/Elasticsearch/Indexer.pm +++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm @@ -26,6 +26,7 @@ use base qw(Koha::SearchEngine::Elasticsearch); use Koha::Exceptions; use Koha::Exceptions::Elasticsearch; use Koha::SearchEngine::Zebra::Indexer; +use Koha::BackgroundJob::UpdateElasticIndex; use C4::AuthoritiesMarc qw//; use C4::Biblio; use C4::Context; @@ -97,12 +98,28 @@ Arrayref of Cs. =cut sub update_index { - my ($self, $biblionums, $records) = @_; + my ($self, $record_ids, $records) = @_; + + my $index_record_ids; + unless ( $records && @$records ) { + for my $record_id ( sort { $a <=> $b } @$record_ids ) { + + next unless $record_id; + + my $record = $self->_get_record( $record_id ); + if( $record ){ + push @$records, $record; + push @$index_record_ids, $record_id; + } + } + } else { + $index_record_ids = $record_ids; + } my $documents = $self->marc_records_to_documents($records); my @body; - for (my $i = 0; $i < scalar @$biblionums; $i++) { - my $id = $biblionums->[$i]; + for (my $i = 0; $i < scalar @$index_record_ids; $i++) { + my $id = $index_record_ids->[$i]; my $document = $documents->[$i]; push @body, { index => { @@ -271,7 +288,7 @@ sub update_mappings { $self->set_index_status_ok(); } -=head2 update_index_background($biblionums, $records) +=head2 update_index_background($record_numbers, $server) This has exactly the same API as C however it'll return immediately. It'll start a background process that does the adding. @@ -281,12 +298,10 @@ it to be updated by a regular index cron job in the future. =cut -# TODO implement in the future - I don't know the best way of doing this yet. -# If fork: make sure process group is changed so apache doesn't wait for us. - sub update_index_background { - my $self = shift; - $self->update_index(@_); + my ( $self, $record_numbers, $server ) = @_; + + Koha::BackgroundJob::UpdateElasticIndex->new->enqueue({ record_ids => $record_numbers, record_server => $server }); } =head2 index_records @@ -307,19 +322,11 @@ sub index_records { $record_numbers = [$record_numbers] if ref $record_numbers ne 'ARRAY' && defined $record_numbers; $records = [$records] if ref $records ne 'ARRAY' && defined $records; if ( $op eq 'specialUpdate' ) { - my $index_record_numbers; if ($records){ - $index_record_numbers = $record_numbers; + $self->update_index( $record_numbers, $records ); } else { - foreach my $record_number ( @$record_numbers ){ - my $record = _get_record( $record_number, $server ); - if( $record ){ - push @$records, $record; - push @$index_record_numbers, $record_number; - } - } + $self->update_index_background( $record_numbers, $server ); } - $self->update_index_background( $index_record_numbers, $records ) if $index_record_numbers && $records; } elsif ( $op eq 'recordDelete' ) { $self->delete_index_background( $record_numbers ); @@ -329,10 +336,10 @@ sub index_records { } sub _get_record { - my ( $id, $server ) = @_; - return $server eq 'biblioserver' - ? C4::Biblio::GetMarcBiblio({ biblionumber => $id, embed_items => 1 }) - : C4::AuthoritiesMarc::GetAuthority($id); + my ( $self, $record_id ) = @_; + return $self->index eq $Koha::SearchEngine::BIBLIOS_INDEX + ? C4::Biblio::GetMarcBiblio({ biblionumber => $record_id, embed_items => 1 }) + : C4::AuthoritiesMarc::GetAuthority($record_id); } =head2 delete_index($biblionums) diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/background_jobs_update_elastic_index.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/background_jobs_update_elastic_index.inc new file mode 100644 index 0000000000..d35f06fee5 --- /dev/null +++ b/koha-tmpl/intranet-tmpl/prog/en/includes/background_jobs_update_elastic_index.inc @@ -0,0 +1,23 @@ +[% USE Koha %] + +[% BLOCK report %] + [% SET report = job.report %] + [% IF report %] + [% IF report.total_records == 1 %] + [% IF report.total_success == 1 %] +
The records have successfully been reindexed!
+ [% END %] + [% ELSE %] +
+ [% report.total_success | html %] / [% report.total_records | html %] records have successfully been reindexed. Some errors occurred. + [% IF job.status == 'cancelled' %]The job has been cancelled before it finished.[% END %] +
+ [% END %] + [% END %] +[% END %] + +[% BLOCK detail %] +[% END %] + +[% BLOCK js %] +[% END %] diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/background_jobs.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/background_jobs.tt index f516c39506..05703abaf1 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/background_jobs.tt +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/background_jobs.tt @@ -37,6 +37,8 @@ Batch item record deletion [% CASE "batch_hold_cancel" %] Batch hold cancellation + [% CASE 'update_elastic_index' %] + Update Elasticsearch index [% CASE %]Unknown job type '[% job_type | html %]' [% END %] -- 2.39.5