From 2f8ae2ec212e67cbe004e15484cf561a77bc40a5 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Wed, 3 Jan 2024 18:54:25 +0000 Subject: [PATCH] Bug 35086: Also split chunks when indexing from background job The es background indexer is designed to combine background jobs when started based on the 'batch_size' option. While this is helpful for combining individual updates, it can be problematic when there are several large batch modifications, or when worker has stopped and is restarted. This patch uses the same logic as in the indexer to split the chunks that are sent directly for indexing. To test: 1 - Follow test plan on previous patch 2 - Confirm items are correctly indexed and jobs marked Signed-off-by: David Nind Signed-off-by: Jonathan Druart Signed-off-by: Katrin Fischer (cherry picked from commit 7496a603cda7832bcf1bdb606843bd88f18af4f3) Signed-off-by: Fridolin Somers --- misc/workers/es_indexer_daemon.pl | 34 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/misc/workers/es_indexer_daemon.pl b/misc/workers/es_indexer_daemon.pl index b24d1902f3..44bcdbf9a5 100755 --- a/misc/workers/es_indexer_daemon.pl +++ b/misc/workers/es_indexer_daemon.pl @@ -55,6 +55,7 @@ use JSON qw( decode_json ); use Try::Tiny; use Pod::Usage; use Getopt::Long; +use List::MoreUtils qw( natatime ); use C4::Context; use Koha::Logger; @@ -95,8 +96,11 @@ if ( $conn ) { } ); } -my $biblio_indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::BIBLIOS_INDEX }); -my $auth_indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::AUTHORITIES_INDEX }); +my $biblio_indexer = Koha::SearchEngine::Indexer->new( { index => $Koha::SearchEngine::BIBLIOS_INDEX } ); +my $auth_indexer = Koha::SearchEngine::Indexer->new( { index => $Koha::SearchEngine::AUTHORITIES_INDEX } ); +my $config = $biblio_indexer->get_elasticsearch_params; +my $at_a_time = $config->{chunk_size} // 5000; + my @jobs = (); while (1) { @@ -177,18 +181,24 @@ sub commit { } if (@auth_records) { - try { - $auth_indexer->update_index( \@auth_records ); - } catch { - $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); - }; + my $auth_chunks = natatime $at_a_time, @auth_records; + while ( ( my @auth_chunk = $auth_chunks->() ) ) { + try { + $auth_indexer->update_index( \@auth_chunk ); + } catch { + $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); + }; + } } if (@bib_records) { - try { - $biblio_indexer->update_index( \@bib_records ); - } catch { - $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); - }; + my $biblio_chunks = natatime $at_a_time, @bib_records; + while ( ( my @bib_chunk = $biblio_chunks->() ) ) { + try { + $biblio_indexer->update_index( \@bib_chunk ); + } catch { + $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); + }; + } } # Finish -- 2.39.5