Browse Source

Bug 28268: Improve memory usage when indexing authorities in Elasticsearch

Retrieves the complete records one by one to avoid huge memory usage.

Note that this removes the call to GuessAuthTypeCode, but it is done later in Koha::SearchEngine::Elasticsearch::marc_records_to_documents (and was never done if you asked to index a single record with --authid parameter).

Test plan:
1. Apply patch
2. Reindex authorities: perl misc/search_tools/rebuild_elasticsearch.pl -a -d -v
3. Check that indexing completed successfully and results are correct.

Signed-off-by: Aleisha Amohia <aleishaamohia@hotmail.com>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
21.05.x
Ere Maijala 9 months ago
committed by Jonathan Druart
parent
commit
7e14b054b2
  1. 46
      Koha/MetadataRecord/Authority.pm

46
Koha/MetadataRecord/Authority.pm

@ -191,7 +191,7 @@ sub get_all_authorities_iterator {
};
}
my $search_options->{columns} = [qw/ authid authtypecode marcxml /];
my $search_options->{columns} = [qw/ authid /];
if ($options{desc}) {
$search_options->{order_by} = { -desc => 'authid' };
}
@ -203,36 +203,20 @@ sub get_all_authorities_iterator {
$search_terms,
$search_options);
my $next_func = sub {
my $row = $rs->next();
return if !$row;
my $authid = $row->authid;
my $authtypecode = $row->authtypecode;
my $marcxml = $row->marcxml;
my $record = eval {
MARC::Record->new_from_xml(
StripNonXmlChars($marcxml),
'UTF-8',
(
C4::Context->preference("marcflavour") eq "UNIMARC"
? "UNIMARCAUTH"
: C4::Context->preference("marcflavour")
)
);
};
confess "$@" if ($@);
$record->encoding('UTF-8');
# I'm not sure why we don't use the authtypecode from the database,
# but this is how the original code does it.
require C4::AuthoritiesMarc;
$authtypecode = C4::AuthoritiesMarc::GuessAuthTypeCode($record);
my $auth = __PACKAGE__->new( $record, { authid => $authid, id => $authid, authtypecode => $authtypecode } );
return $auth;
};
return Koha::MetadataIterator->new($next_func);
# Warn and skip bad records, otherwise we break the loop
while (1) {
my $row = $rs->next();
return if !$row;
my $auth = __PACKAGE__->get_from_authid($row->authid);
if (!$auth) {
warn "Something went wrong reading record for authority $row->authid: $@\n";
next;
}
return $auth;
}
};
return Koha::MetadataIterator->new($next_func);
}
1;

Loading…
Cancel
Save