From fc0abee5586beb2d003c2d02842c90ca9bb29bd2 Mon Sep 17 00:00:00 2001 From: David Gustafsson Date: Mon, 28 May 2018 16:03:32 +0200 Subject: [PATCH] Bug 19893: Remove serialization format setting Default to base64 encoded binary MARC with MARCXML fallback if record exceeds maximum size Sponsored-by: Gothenburg University Library Signed-off-by: Ere Maijala Signed-off-by: Martin Renvoize Signed-off-by: Nick Clemens --- Koha/SearchEngine/Elasticsearch.pm | 19 +++++++++++++++++-- Koha/SearchEngine/Elasticsearch/Search.pm | 7 +++++-- .../elasticsearch/field_config.yaml | 6 ++++++ .../en/modules/admin/preferences/admin.pref | 8 -------- 4 files changed, 28 insertions(+), 12 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 93d9213dfb..32ba39bfdb 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -379,11 +379,26 @@ sub marc_records_to_documents { } # TODO: Perhaps should check if $records_document non empty, but really should never be the case $record->encoding('UTF-8'); - if ($serialization_format eq 'base64ISO2709') { + my @warnings; + { + # Temporarily intercept all warn signals (MARC::Record carps when record length > 99999) + local $SIG{__WARN__} = sub { + push @warnings, $_[0]; + }; $record_document->{'marc_data'} = encode_base64(encode('UTF-8', $record->as_usmarc())); } - else { + if (@warnings) { + # Suppress warnings if record length exceeded + unless (substr($record->leader(), 0, 5) eq '99999') { + foreach my $warning (@warnings) { + carp($warning); + } + } $record_document->{'marc_data'} = $record->as_xml_record($marcflavour); + $record_document->{'marc_format'} = 'MARCXML'; + } + else { + $record_document->{'marc_format'} = 'base64ISO2709'; } my $id = $record->subfield('999', 'c'); push @record_documents, [$id, $record_document]; diff --git a/Koha/SearchEngine/Elasticsearch/Search.pm b/Koha/SearchEngine/Elasticsearch/Search.pm index 3bd27eae0a..718eb063b0 100644 --- a/Koha/SearchEngine/Elasticsearch/Search.pm +++ b/Koha/SearchEngine/Elasticsearch/Search.pm @@ -369,11 +369,14 @@ sub decode_record_from_result { # Result is passed in as array, will get flattened # and first element will be $result my ( $self, $result ) = @_; - if (C4::Context->preference('ElasticsearchMARCSerializationFormat') eq 'MARCXML') { + if ($result->{marc_format} eq 'base64ISO2709') { + return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data})); + } + elsif ($result->{marc_format} eq 'MARCXML') { return MARC::Record->new_from_xml($result->{marc_data}, 'UTF-8', uc C4::Context->preference('marcflavour')); } else { - return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data})); + die("Missing marc_format field in Elasticsearch result"); } } diff --git a/admin/searchengine/elasticsearch/field_config.yaml b/admin/searchengine/elasticsearch/field_config.yaml index 0d60cd87e6..0baddf3a02 100644 --- a/admin/searchengine/elasticsearch/field_config.yaml +++ b/admin/searchengine/elasticsearch/field_config.yaml @@ -10,6 +10,12 @@ general: type: text analyzer: keyword index: false + marc_format: + store: true + type: text + analyzer: keyword + index: false + # Search fields search: boolean: diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref index 95bd765b81..464c8c8e6a 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref @@ -427,11 +427,3 @@ Administration: choices: Zebra: Zebra Elasticsearch: Elasticsearch - - - - "Use" - - pref: ElasticsearchMARCSerializationFormat - default: MARCXML - choices: - MARCXML: MARCXML - base64ISO2709: base64ISO2709 - - "as serialization format for MARC records stored in Elasticsearch index. base64ISO2709 is faster and will use less space but have a maximum record length which could cause issues with very large records." -- 2.39.5