From 7496a603cda7832bcf1bdb606843bd88f18af4f3 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Wed, 3 Jan 2024 18:54:25 +0000 Subject: [PATCH] Bug 35086: Also split chunks when indexing from background job The es background indexer is designed to combine background jobs when started based on the 'batch_size' option. While this is helpful for combining individual updates, it can be problematic when there are several large batch modifications, or when worker has stopped and is restarted. This patch uses the same logic as in the indexer to split the chunks that are sent directly for indexing. To test: 1 - Follow test plan on previous patch 2 - Confirm items are correctly indexed and jobs marked Signed-off-by: David Nind Signed-off-by: Jonathan Druart Signed-off-by: Katrin Fischer --- misc/workers/es_indexer_daemon.pl | 34 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/misc/workers/es_indexer_daemon.pl b/misc/workers/es_indexer_daemon.pl index b24d1902f3..44bcdbf9a5 100755 --- a/misc/workers/es_indexer_daemon.pl +++ b/misc/workers/es_indexer_daemon.pl @@ -55,6 +55,7 @@ use JSON qw( decode_json ); use Try::Tiny; use Pod::Usage; use Getopt::Long; +use List::MoreUtils qw( natatime ); use C4::Context; use Koha::Logger; @@ -95,8 +96,11 @@ if ( $conn ) { } ); } -my $biblio_indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::BIBLIOS_INDEX }); -my $auth_indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::AUTHORITIES_INDEX }); +my $biblio_indexer = Koha::SearchEngine::Indexer->new( { index => $Koha::SearchEngine::BIBLIOS_INDEX } ); +my $auth_indexer = Koha::SearchEngine::Indexer->new( { index => $Koha::SearchEngine::AUTHORITIES_INDEX } ); +my $config = $biblio_indexer->get_elasticsearch_params; +my $at_a_time = $config->{chunk_size} // 5000; + my @jobs = (); while (1) { @@ -177,18 +181,24 @@ sub commit { } if (@auth_records) { - try { - $auth_indexer->update_index( \@auth_records ); - } catch { - $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); - }; + my $auth_chunks = natatime $at_a_time, @auth_records; + while ( ( my @auth_chunk = $auth_chunks->() ) ) { + try { + $auth_indexer->update_index( \@auth_chunk ); + } catch { + $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); + }; + } } if (@bib_records) { - try { - $biblio_indexer->update_index( \@bib_records ); - } catch { - $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); - }; + my $biblio_chunks = natatime $at_a_time, @bib_records; + while ( ( my @bib_chunk = $biblio_chunks->() ) ) { + try { + $biblio_indexer->update_index( \@bib_chunk ); + } catch { + $logger->warn( sprintf "Update of elastic index failed with: %s", $_ ); + }; + } } # Finish -- 2.39.5