From 1d29d0e4497eedfdc0515a636ed65c2be68ba081 Mon Sep 17 00:00:00 2001 From: David Gustafsson Date: Mon, 22 Oct 2018 11:30:17 +0200 Subject: [PATCH] Bug 19893: Add pods, remove syspref, add tests for serialization format Add missing pods, remove obsolete syspref and add test for serialization format for records exceeding max record size Sponsored-by: Gothenburg University Library Signed-off-by: Ere Maijala Signed-off-by: Martin Renvoize Signed-off-by: Nick Clemens --- Koha/SearchEngine/Elasticsearch.pm | 299 +++++++++++++------ Koha/SearchEngine/Elasticsearch/Indexer.pm | 210 ++++++++++--- admin/searchengine/elasticsearch/mappings.pl | 6 +- misc/search_tools/rebuild_elastic_search.pl | 6 +- t/Koha/SearchEngine/Elasticsearch.t | 27 +- 5 files changed, 405 insertions(+), 143 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index bd1c5e0f6f..1c5fc29d4b 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -73,6 +73,15 @@ sub new { return $self; } +=head2 get_elasticsearch + + my $elasticsearch_client = $self->get_elasticsearch(); + +Returns a C client. The client is cached on a C +instance level and will be reused if method is called multiple times. + +=cut + sub get_elasticsearch { my $self = shift @_; unless (defined $self->{elasticsearch}) { @@ -145,8 +154,8 @@ sub get_elasticsearch_params { my $settings = $self->get_elasticsearch_settings(); -This provides the settings provided to elasticsearch when an index is created. -These can do things like define tokenisation methods. +This provides the settings provided to Elasticsearch when an index is created. +These can do things like define tokenization methods. A hashref containing the settings is returned. @@ -170,7 +179,7 @@ sub get_elasticsearch_settings { my $mappings = $self->get_elasticsearch_mappings(); -This provides the mappings that get passed to elasticsearch when an index is +This provides the mappings that get passed to Elasticsearch when an index is created. =cut @@ -232,7 +241,7 @@ sub get_elasticsearch_mappings { =head2 _get_elasticsearch_mapping -Get the ES mappings for the given purpose and data type +Get the Elasticsearch mappings for the given purpose and data type. $mapping = _get_elasticsearch_mapping('search', 'text'); @@ -301,51 +310,96 @@ sub sort_fields { return $self->_sort_fields_accessor(); } +=head2 _process_mappings($mappings, $data, $record_document) + +Process all C<$mappings> targets operating on a specific MARC field C<$data> applied to C<$record_document> +Since we group all mappings by MARC field targets C<$mappings> will contain all targets for C<$data> +and thus we need to fetch the MARC field only once. + +=over 4 + +=item C<$mappings> + +Arrayref of mappings containing arrayrefs on the format [C<$taget>, C<$options>] where +C<$target> is the name of the target field and C<$options> is a hashref containing processing +directives for this particular mapping. + +=item C<$data> + +The source data from a MARC record field. + +=item C<$record_document> + +Hashref representing the Elasticsearch document on which mappings should be applied. + +=back + +=cut + +sub _process_mappings { + my ($_self, $mappings, $data, $record_document) = @_; + foreach my $mapping (@{$mappings}) { + my ($target, $options) = @{$mapping}; + # Copy (scalar) data since can have multiple targets + # with differing options for (possibly) mutating data + # so need a different copy for each + my $_data = $data; + $record_document->{$target} //= []; + if (defined $options->{substr}) { + my ($start, $length) = @{$options->{substr}}; + $_data = length($data) > $start ? substr $data, $start, $length : ''; + } + if (defined $options->{value_callbacks}) { + $_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}}); + } + if (defined $options->{property}) { + $_data = { + $options->{property} => $_data + } + } + push @{$record_document->{$target}}, $_data; + } +} + +=head2 marc_records_to_documents($marc_records) + + my @record_documents = $self->marc_records_to_documents($marc_records); + +Using mappings stored in database convert C<$marc_records> to Elasticsearch documents. + +Returns array of hash references, representing Elasticsearch documents, +acceptable as body payload in C requests. + +=over 4 + +=item C<$marc_documents> + +Reference to array of C objects to be converted to Elasticsearch documents. + +=back + +=cut + sub marc_records_to_documents { my ($self, $records) = @_; - my $rules = $self->get_marc_mapping_rules(); + my $rules = $self->_get_marc_mapping_rules(); my $control_fields_rules = $rules->{control_fields}; my $data_fields_rules = $rules->{data_fields}; my $marcflavour = lc C4::Context->preference('marcflavour'); - my $serialization_format = C4::Context->preference('ElasticsearchMARCSerializationFormat'); my @record_documents; - sub _process_mappings { - my ($mappings, $data, $record_document) = @_; - foreach my $mapping (@{$mappings}) { - my ($target, $options) = @{$mapping}; - # Copy (scalar) data since can have multiple targets - # with differing options for (possibly) mutating data - # so need a different copy for each - my $_data = $data; - $record_document->{$target} //= []; - if (defined $options->{substr}) { - my ($start, $length) = @{$options->{substr}}; - $_data = length($data) > $start ? substr $data, $start, $length : ''; - } - if (defined $options->{value_callbacks}) { - $_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}}); - } - if (defined $options->{property}) { - $_data = { - $options->{property} => $_data - } - } - push @{$record_document->{$target}}, $_data; - } - } foreach my $record (@{$records}) { my $record_document = {}; my $mappings = $rules->{leader}; if ($mappings) { - _process_mappings($mappings, $record->leader(), $record_document); + $self->_process_mappings($mappings, $record->leader(), $record_document); } foreach my $field ($record->fields()) { if($field->is_control_field()) { my $mappings = $control_fields_rules->{$field->tag()}; if ($mappings) { - _process_mappings($mappings, $field->data(), $record_document); + $self->_process_mappings($mappings, $field->data(), $record_document); } } else { @@ -361,7 +415,7 @@ sub marc_records_to_documents { $mappings = [@{$mappings}, @{$wildcard_mappings}]; } if (@{$mappings}) { - _process_mappings($mappings, $data, $record_document); + $self->_process_mappings($mappings, $data, $record_document); } } @@ -377,7 +431,7 @@ sub marc_records_to_documents { ) ); if ($data) { - _process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document); + $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document); } } } @@ -426,63 +480,134 @@ sub marc_records_to_documents { return \@record_documents; } -# Provides the rules for marc to Elasticsearch JSON document conversion. -sub get_marc_mapping_rules { - my ($self) = @_; +=head2 _field_mappings($facet, $suggestible, $sort, $target_name, $target_type, $range) - my $marcflavour = lc C4::Context->preference('marcflavour'); - my @rules; +Get mappings, an internal data structure later used by L<_process_mappings($mappings, $data, $record_document)> +to process MARC target data, for a MARC mapping. + +The returned C<$mappings> is to to be confused with mappings provided by C<_foreach_mapping>, rather this +sub accepts properties from a mapping as provided by C<_foreach_mapping> and expands it to this internal +data stucture. In the caller context (C<_get_marc_mapping_rules>) the returned C<@mappings> is then +applied to each MARC target (leader, control field data, subfield or joined subfields) and +integrated into the mapping rules data structure used in C to +transform MARC records into Elasticsearch documents. + +=over 4 - sub _field_mappings { - my ($facet, $suggestible, $sort, $target_name, $target_type, $range) = @_; - my %mapping_defaults = (); - my @mappings; - - my $substr_args = undef; - if ($range) { - # TODO: use value_callback instead? - my ($start, $end) = map(int, split /-/, $range, 2); - $substr_args = [$start]; - push @{$substr_args}, (defined $end ? $end - $start + 1 : 1); +=item C<$facet> + +Boolean indicating whether to create a facet field for this mapping. + +=item C<$suggestible> + +Boolean indicating whether to create a suggestion field for this mapping. + +=item C<$sort> + +Boolean indicating whether to create a sort field for this mapping. + +=item C<$target_name> + +Elasticsearch document target field name. + +=item C<$target_type> + +Elasticsearch document target field type. + +=item C<$range> + +An optinal range as a string on the format "-" or "", +where "" and "" are integers specifying a range that will be used +for extracting a substing from MARC data as Elasticsearch field target value. + +The first character position is "1", and the range is inclusive, +so "1-3" means the first three characters of MARC data. + +If only "" is provided only one character as position "" will +be extracted. + +=back + +=cut + +sub _field_mappings { + my ($_self, $facet, $suggestible, $sort, $target_name, $target_type, $range) = @_; + my %mapping_defaults = (); + my @mappings; + + my $substr_args = undef; + if ($range) { + # TODO: use value_callback instead? + my ($start, $end) = map(int, split /-/, $range, 2); + $substr_args = [$start]; + push @{$substr_args}, (defined $end ? $end - $start + 1 : 1); + } + my $default_options = {}; + if ($substr_args) { + $default_options->{substr} = $substr_args; + } + + # TODO: Should probably have per type value callback/hook + # but hard code for now + if ($target_type eq 'boolean') { + $default_options->{value_callbacks} //= []; + push @{$default_options->{value_callbacks}}, sub { + my ($value) = @_; + # Trim whitespace at both ends + $value =~ s/^\s+|\s+$//g; + return $value ? 'true' : 'false'; + }; + } + + my $mapping = [$target_name, $default_options]; + push @mappings, $mapping; + + my @suffixes = (); + push @suffixes, 'facet' if $facet; + push @suffixes, 'suggestion' if $suggestible; + push @suffixes, 'sort' if !defined $sort || $sort; + + foreach my $suffix (@suffixes) { + my $mapping = ["${target_name}__$suffix"]; + # TODO: Hack, fix later in less hideous manner + if ($suffix eq 'suggestion') { + push @{$mapping}, {%{$default_options}, property => 'input'}; } - my $default_options = {}; - if ($substr_args) { - $default_options->{substr} = $substr_args; + else { + push @{$mapping}, $default_options; } + push @mappings, $mapping; + } + return @mappings; +}; - # TODO: Should probably have per type value callback/hook - # but hard code for now - if ($target_type eq 'boolean') { - $default_options->{value_callbacks} //= []; - push @{$default_options->{value_callbacks}}, sub { - my ($value) = @_; - # Trim whitespace at both ends - $value =~ s/^\s+|\s+$//g; - return $value ? 'true' : 'false'; - }; - } +=head2 _get_marc_mapping_rules - my $mapping = [$target_name, $default_options]; - push @mappings, $mapping; + my $mapping_rules = $self->_get_marc_mapping_rules() - my @suffixes = (); - push @suffixes, 'facet' if $facet; - push @suffixes, 'suggestion' if $suggestible; - push @suffixes, 'sort' if !defined $sort || $sort; +Generates rules from mappings stored in database for MARC records to Elasticsearch JSON document conversion. + +Since field retrieval is slow in C (all fields are itereted through for +each call to C->field) we create an optimized structure of mapping +rules keyed by MARC field tags holding all the mapping rules for that particular tag. + +We can then iterate through all MARC fields for each record and apply all relevant +rules once per fields instead of retreiving fields multiple times for each mapping rule +wich is terribly slow. + +=cut + +# TODO: This structure can be used for processing multiple MARC::Records so is currently +# rebuilt for each batch. Since it is cacheable it could also be stored in an in +# memory cache which it is currently not. The performance gain of caching +# would probably be marginal, but to do this could be a further improvement. + +sub _get_marc_mapping_rules { + my ($self) = @_; + + my $marcflavour = lc C4::Context->preference('marcflavour'); + my @rules; - foreach my $suffix (@suffixes) { - my $mapping = ["${target_name}__$suffix"]; - # Hack, fix later in less hideous manner - if ($suffix eq 'suggestion') { - push @{$mapping}, {%{$default_options}, property => 'input'}; - } - else { - push @{$mapping}, $default_options; - } - push @mappings, $mapping; - } - return @mappings; - }; my $field_spec_regexp = qr/^([0-9]{3})([()0-9a-z]+)?(?:_\/(\d+(?:-\d+)?))?$/; my $leader_regexp = qr/^leader(?:_\/(\d+(?:-\d+)?))?$/; my $rules = { @@ -494,7 +619,7 @@ sub get_marc_mapping_rules { }; $self->_foreach_mapping(sub { - my ( $name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field ) = @_; + my ($name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field) = @_; return if $marc_type ne $marcflavour; if ($type eq 'sum') { @@ -550,7 +675,7 @@ sub get_marc_mapping_rules { } my $range = defined $3 ? $3 : undef; - my @mappings = _field_mappings($facet, $suggestible, $sort, $name, $type, $range); + my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $name, $type, $range); if ($field_tag < 10) { $rules->{control_fields}->{$field_tag} //= []; @@ -570,11 +695,11 @@ sub get_marc_mapping_rules { } elsif ($marc_field =~ $leader_regexp) { my $range = defined $1 ? $1 : undef; - my @mappings = _field_mappings($facet, $suggestible, $sort, $name, $type, $range); + my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $name, $type, $range); push @{$rules->{leader}}, @mappings; } else { - die("Invalid marc field: $marc_field"); + die("Invalid MARC field: $marc_field"); } }); return $rules; diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm index a6bd323d30..0bc68f6435 100644 --- a/Koha/SearchEngine/Elasticsearch/Indexer.pm +++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm @@ -44,45 +44,76 @@ Koha::SearchEngine::Elasticsearch::Indexer - handles adding new records to the i $indexer->drop_index(); $indexer->update_index(\@biblionumbers, \@records); -=head1 FUNCTIONS -=head2 $indexer->update_index($biblionums, $records); +=head1 CONSTANTS -C<$biblionums> is an arrayref containing the biblionumbers for the records. +=over 4 -C<$records> is an arrayref containing the Ls themselves. +=item C -The values in the arrays must match up, and the 999$c value in the MARC record -will be rewritten using the values in C<$biblionums> to ensure they are correct. -If C<$biblionums> is C, this won't happen, but you should be sure that -999$c is correct on your own then. +Represents an index state where index is created and in a working state. -Note that this will modify the original record if C<$biblionums> is supplied. -If that's a problem, clone them first. +=item C + +Not currently used, but could be useful later, for example if can detect when new field or mapping added. + +=item C + +Representings an index state where index needs to be recreated and is not in a working state. + +=back =cut use constant { INDEX_STATUS_OK => 0, - INDEX_STATUS_REINDEX_REQUIRED => 1, # Not currently used, but could be useful later, for example if can detect when new field or mapping added + INDEX_STATUS_REINDEX_REQUIRED => 1, INDEX_STATUS_RECREATE_REQUIRED => 2, }; +=head1 FUNCTIONS + +=head2 update_index($biblionums, $records) + + try { + $self->update_index($biblionums, $records); + } catch { + die("Something whent wrong trying to update index:" . $_[0]); + } + +Converts C C<$records> to Elasticsearch documents and performs +an update request for these records on the Elasticsearch index. + +The values in the arrays must match up, and the 999$c value in the MARC record +will be rewritten using the values in C<$biblionums> to ensure they are correct. +If C<$biblionums> is C, this won't happen, so in that case you should make +sure that 999$c is correct. + +Note that this will modify the original record if C<$biblionums> is supplied. +If that's a problem, clone them first. + +=over 4 + +=item C<$biblionums> + +Arrayref of biblio numbers for the C<$records>, the order must be the same as +and match up with C<$records>. + +=item C<$records> + +Arrayref of Cs. + +=back + +=cut + sub update_index { my ($self, $biblionums, $records) = @_; - # TODO should have a separate path for dealing with a large number - # of records at once where we use the bulk update functions in ES. if ($biblionums) { $self->_sanitise_records($biblionums, $records); } - $self->bulk_index($records); - return 1; -} - -sub bulk_index { - my ($self, $records) = @_; my $conf = $self->get_elasticsearch_params(); my $elasticsearch = $self->get_elasticsearch(); my $documents = $self->marc_records_to_documents($records); @@ -108,27 +139,88 @@ sub bulk_index { return 1; } -sub index_status_ok { - my ($self, $set) = @_; - return defined $set ? - $self->index_status(INDEX_STATUS_OK) : - $self->index_status == INDEX_STATUS_OK; +=head2 set_index_status_ok + +Convenience method for setting index status to C. + +=cut + +sub set_index_status_ok { + my ($self) = @_; + $self->index_status(INDEX_STATUS_OK); } -sub index_status_reindex_required { - my ($self, $set) = @_; - return defined $set ? - $self->index_status(INDEX_STATUS_REINDEX_REQUIRED) : - $self->index_status == INDEX_STATUS_REINDEX_REQUIRED; +=head2 is_index_status_ok + +Convenience method for checking if index status is C. + +=cut + +sub is_index_status_ok { + my ($self) = @_; + return $self->index_status == INDEX_STATUS_OK; } -sub index_status_recreate_required { - my ($self, $set) = @_; - return defined $set ? - $self->index_status(INDEX_STATUS_RECREATE_REQUIRED) : - $self->index_status == INDEX_STATUS_RECREATE_REQUIRED; +=head2 set_index_status_reindex_required + +Convenience method for setting index status to C. + +=cut + +sub set_index_status_reindex_required { + my ($self) = @_; + $self->index_status(INDEX_STATUS_REINDEX_REQUIRED); } +=head2 is_index_status_reindex_required + +Convenience method for checking if index status is C. + +=cut + +sub is_index_status_reindex_required { + my ($self) = @_; + return $self->index_status == INDEX_STATUS_REINDEX_REQUIRED; +} + +=head2 set_index_status_recreate_required + +Convenience method for setting index status to C. + +=cut + +sub set_index_status_recreate_required { + my ($self) = @_; + $self->index_status(INDEX_STATUS_RECREATE_REQUIRED); +} + +=head2 is_index_status_recreate_required + +Convenience method for checking if index status is C. + +=cut + +sub is_index_status_recreate_required { + my ($self) = @_; + return $self->index_status == INDEX_STATUS_RECREATE_REQUIRED; +} + +=head2 index_status($status) + +Will either set the current index status to C<$status> and return C<$status>, +or return the current index status if called with no arguments. + +=over 4 + +=item C<$status> + +Optional argument. If passed will set current index status to C<$status> if C<$status> is +a valid status. See L. + +=back + +=cut + sub index_status { my ($self, $status) = @_; my $key = 'ElasticsearchIndexStatus_' . $self->index; @@ -150,6 +242,15 @@ sub index_status { } } +=head2 update_mappings + +Generate Elasticsearch mappings from mappings stored in database and +perform a request to update Elasticsearch index mappings. Will throw an +error and set index status to C if update +failes. + +=cut + sub update_mappings { my ($self) = @_; my $conf = $self->get_elasticsearch_params(); @@ -166,35 +267,35 @@ sub update_mappings { } ); } catch { - $self->index_status_recreate_required(1); + $self->set_index_status_recreate_required(); my $reason = $_[0]->{vars}->{body}->{error}->{reason}; Koha::Exceptions::Exception->throw( error => "Unable to update mappings for index \"$conf->{index_name}\". Reason was: \"$reason\". Index needs to be recreated and reindexed", ); }; } - $self->index_status_ok(1); + $self->set_index_status_ok(); } -=head2 $indexer->update_index_background($biblionums, $records) +=head2 update_index_background($biblionums, $records) -This has exactly the same API as C however it'll +This has exactly the same API as C however it'll return immediately. It'll start a background process that does the adding. If it fails to add to Elasticsearch then it'll add to a queue that will cause it to be updated by a regular index cron job in the future. +=cut + # TODO implement in the future - I don't know the best way of doing this yet. # If fork: make sure process group is changed so apache doesn't wait for us. -=cut - sub update_index_background { my $self = shift; $self->update_index(@_); } -=head2 $indexer->delete_index($biblionums) +=head2 delete_index($biblionums) C<$biblionums> is an arrayref of biblionumbers to delete from the index. @@ -217,23 +318,21 @@ sub delete_index { $self->store->bag->commit; } -=head2 $indexer->delete_index_background($biblionums) +=head2 delete_index_background($biblionums) -Identical to L, this will return immediately and start a -background process to do the actual deleting. +Identical to L =cut -# TODO implement in the future - +# TODO: Should be made async sub delete_index_background { my $self = shift; $self->delete_index(@_); } -=head2 $indexer->drop_index(); +=head2 drop_index -Drops the index from the elasticsearch server. +Drops the index from the Elasticsearch server. =cut @@ -243,10 +342,16 @@ sub drop_index { my $conf = $self->get_elasticsearch_params(); my $elasticsearch = $self->get_elasticsearch(); $elasticsearch->indices->delete(index => $conf->{index_name}); - $self->index_status_recreate_required(1); + $self->set_index_status_recreate_required(); } } +=head2 create_index + +Creates the index (including mappings) on the Elasticsearch server. + +=cut + sub create_index { my ($self) = @_; my $conf = $self->get_elasticsearch_params(); @@ -261,6 +366,13 @@ sub create_index { $self->update_mappings(); } +=head2 index_exists + +Checks if index has been created on the Elasticsearch server. Returns C<1> or the +empty string to indicate whether index exists or not. + +=cut + sub index_exists { my ($self) = @_; my $conf = $self->get_elasticsearch_params(); diff --git a/admin/searchengine/elasticsearch/mappings.pl b/admin/searchengine/elasticsearch/mappings.pl index d5fef83c38..a2c1f66580 100755 --- a/admin/searchengine/elasticsearch/mappings.pl +++ b/admin/searchengine/elasticsearch/mappings.pl @@ -150,16 +150,16 @@ my @indexes; for my $index_name (@index_names) { my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new({ index => $index_name }); - if (!$indexer->index_status_ok) { + if (!$indexer->is_index_status_ok) { my $conf = $indexer->get_elasticsearch_params(); - if ($indexer->index_status_reindex_required) { + if ($indexer->is_index_status_reindex_required) { push @messages, { type => 'error', code => 'reindex_required', index => $conf->{index_name}, }; } - elsif($indexer->index_status_recreate_required) { + elsif($indexer->is_index_status_recreate_required) { push @messages, { type => 'error', code => 'recreate_required', diff --git a/misc/search_tools/rebuild_elastic_search.pl b/misc/search_tools/rebuild_elastic_search.pl index bed6b5197c..caa9a3d870 100755 --- a/misc/search_tools/rebuild_elastic_search.pl +++ b/misc/search_tools/rebuild_elastic_search.pl @@ -165,13 +165,13 @@ sub do_reindex { $indexer->drop_index() if $indexer->index_exists(); $indexer->create_index(); } - elsif (!$indexer->index_exists()) { + elsif (!$indexer->index_exists) { # Create index if does not exist $indexer->create_index(); - } elsif ($indexer->index_status_ok) { + } elsif ($indexer->is_index_status_ok) { # Update mapping unless index is some kind of problematic state $indexer->update_mappings(); - } elsif ($indexer->index_status_recreate_required) { + } elsif ($indexer->is_index_status_recreate_required) { warn qq/Index "$index_name" has status "recreate required", suggesting it should be recreated/; } diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/Koha/SearchEngine/Elasticsearch.t index f4fe4355fa..323a3b0106 100644 --- a/t/Koha/SearchEngine/Elasticsearch.t +++ b/t/Koha/SearchEngine/Elasticsearch.t @@ -115,7 +115,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub { subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub { - plan tests => 30; + plan tests => 32; t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); @@ -315,6 +315,8 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' ok(defined $docs->[0][1]->{marc_format}, 'First document marc_format field should be set'); + is($docs->[0][1]->{marc_format}, 'base64ISO2709', 'First document marc_format should be set correctly'); + is(scalar @{$docs->[0][1]->{type_of_record}}, 1, 'First document type_of_record field should have one value'); is_deeply( $docs->[0][1]->{type_of_record}, @@ -351,4 +353,27 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' ok(!(defined $docs->[0][1]->{unimarc_title}), "No mapping when marc_type doesn't match marc flavour"); + # Marc serialization format fallback for records exceeding ISO2709 max record size + + my $large_marc_record = MARC::Record->new(); + $large_marc_record->leader(' cam 22 a 4500'); + + $large_marc_record->append_fields( + MARC::Field->new('100', '', '', a => 'Author 1'), + MARC::Field->new('110', '', '', a => 'Corp Author'), + MARC::Field->new('210', '', '', a => 'Title 1'), + MARC::Field->new('245', '', '', a => 'Title:', b => 'large record'), + MARC::Field->new('999', '', '', c => '1234567'), + ); + + my $item_field = MARC::Field->new('952', '', '', o => '123456789123456789123456789', p => '123456789', z => 'test'); + my $items_count = 1638; + while(--$items_count) { + $large_marc_record->append_fields($item_field); + } + + $docs = $see->marc_records_to_documents([$large_marc_record]); + + is($docs->[0][1]->{marc_format}, 'MARCXML', 'For record exceeding max record size marc_format should be set correctly'); + }; -- 2.39.5