From bf63a2f6b005ebea03072b5745f216b40290fa46 Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Fri, 30 Apr 2021 10:00:23 +0300 Subject: [PATCH] Bug 28268: Improve memory usage when indexing authorities in Elasticsearch Retrieves the complete records one by one to avoid huge memory usage. Note that this removes the call to GuessAuthTypeCode, but it is done later in Koha::SearchEngine::Elasticsearch::marc_records_to_documents (and was never done if you asked to index a single record with --authid parameter). Test plan: 1. Apply patch 2. Reindex authorities: perl misc/search_tools/rebuild_elasticsearch.pl -a -d -v 3. Check that indexing completed successfully and results are correct. Signed-off-by: Aleisha Amohia Signed-off-by: Nick Clemens Signed-off-by: Jonathan Druart (cherry picked from commit 7e14b054b2fd5b6fddc2046b46d3bd431acf689a) Signed-off-by: Fridolin Somers --- Koha/MetadataRecord/Authority.pm | 46 +++++++++++--------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/Koha/MetadataRecord/Authority.pm b/Koha/MetadataRecord/Authority.pm index 47f2172976..31e5b53039 100644 --- a/Koha/MetadataRecord/Authority.pm +++ b/Koha/MetadataRecord/Authority.pm @@ -191,7 +191,7 @@ sub get_all_authorities_iterator { }; } - my $search_options->{columns} = [qw/ authid authtypecode marcxml /]; + my $search_options->{columns} = [qw/ authid /]; if ($options{desc}) { $search_options->{order_by} = { -desc => 'authid' }; } @@ -203,36 +203,20 @@ sub get_all_authorities_iterator { $search_terms, $search_options); my $next_func = sub { - my $row = $rs->next(); - return if !$row; - my $authid = $row->authid; - my $authtypecode = $row->authtypecode; - my $marcxml = $row->marcxml; - - my $record = eval { - MARC::Record->new_from_xml( - StripNonXmlChars($marcxml), - 'UTF-8', - ( - C4::Context->preference("marcflavour") eq "UNIMARC" - ? "UNIMARCAUTH" - : C4::Context->preference("marcflavour") - ) - ); - }; - confess "$@" if ($@); - $record->encoding('UTF-8'); - - # I'm not sure why we don't use the authtypecode from the database, - # but this is how the original code does it. - require C4::AuthoritiesMarc; - $authtypecode = C4::AuthoritiesMarc::GuessAuthTypeCode($record); - - my $auth = __PACKAGE__->new( $record, { authid => $authid, id => $authid, authtypecode => $authtypecode } ); - - return $auth; - }; - return Koha::MetadataIterator->new($next_func); + # Warn and skip bad records, otherwise we break the loop + while (1) { + my $row = $rs->next(); + return if !$row; + + my $auth = __PACKAGE__->get_from_authid($row->authid); + if (!$auth) { + warn "Something went wrong reading record for authority $row->authid: $@\n"; + next; + } + return $auth; + } + }; + return Koha::MetadataIterator->new($next_func); } 1; -- 2.39.5