Browse Source

Bug 19893: Remove serialization format setting

Default to base64 encoded binary MARC with MARCXML
fallback if record exceeds maximum size

Sponsored-by: Gothenburg University Library
Signed-off-by: Ere Maijala <ere.maijala@helsinki.fi>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
18.11.x
David Gustafsson 3 years ago
committed by Nick Clemens
parent
commit
fc0abee558
  1. 19
      Koha/SearchEngine/Elasticsearch.pm
  2. 7
      Koha/SearchEngine/Elasticsearch/Search.pm
  3. 6
      admin/searchengine/elasticsearch/field_config.yaml
  4. 8
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref

19
Koha/SearchEngine/Elasticsearch.pm

@ -379,11 +379,26 @@ sub marc_records_to_documents {
}
# TODO: Perhaps should check if $records_document non empty, but really should never be the case
$record->encoding('UTF-8');
if ($serialization_format eq 'base64ISO2709') {
my @warnings;
{
# Temporarily intercept all warn signals (MARC::Record carps when record length > 99999)
local $SIG{__WARN__} = sub {
push @warnings, $_[0];
};
$record_document->{'marc_data'} = encode_base64(encode('UTF-8', $record->as_usmarc()));
}
else {
if (@warnings) {
# Suppress warnings if record length exceeded
unless (substr($record->leader(), 0, 5) eq '99999') {
foreach my $warning (@warnings) {
carp($warning);
}
}
$record_document->{'marc_data'} = $record->as_xml_record($marcflavour);
$record_document->{'marc_format'} = 'MARCXML';
}
else {
$record_document->{'marc_format'} = 'base64ISO2709';
}
my $id = $record->subfield('999', 'c');
push @record_documents, [$id, $record_document];

7
Koha/SearchEngine/Elasticsearch/Search.pm

@ -369,11 +369,14 @@ sub decode_record_from_result {
# Result is passed in as array, will get flattened
# and first element will be $result
my ( $self, $result ) = @_;
if (C4::Context->preference('ElasticsearchMARCSerializationFormat') eq 'MARCXML') {
if ($result->{marc_format} eq 'base64ISO2709') {
return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
}
elsif ($result->{marc_format} eq 'MARCXML') {
return MARC::Record->new_from_xml($result->{marc_data}, 'UTF-8', uc C4::Context->preference('marcflavour'));
}
else {
return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
die("Missing marc_format field in Elasticsearch result");
}
}

6
admin/searchengine/elasticsearch/field_config.yaml

@ -10,6 +10,12 @@ general:
type: text
analyzer: keyword
index: false
marc_format:
store: true
type: text
analyzer: keyword
index: false
# Search fields
search:
boolean:

8
koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref

@ -427,11 +427,3 @@ Administration:
choices:
Zebra: Zebra
Elasticsearch: Elasticsearch
-
- "Use"
- pref: ElasticsearchMARCSerializationFormat
default: MARCXML
choices:
MARCXML: MARCXML
base64ISO2709: base64ISO2709
- "as serialization format for MARC records stored in Elasticsearch index. base64ISO2709 is faster and will use less space but have a maximum record length which could cause issues with very large records."
Loading…
Cancel
Save