From 1d29d0e4497eedfdc0515a636ed65c2be68ba081 Mon Sep 17 00:00:00 2001
From: David Gustafsson <david.gustafsson@ub.gu.se>
Date: Mon, 22 Oct 2018 11:30:17 +0200
Subject: [PATCH] Bug 19893: Add pods, remove syspref, add tests for
 serialization format

Add missing pods, remove obsolete syspref and add test for serialization format for records exceeding max record size

Sponsored-by: Gothenburg University Library
Signed-off-by: Ere Maijala <ere.maijala@helsinki.fi>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
---
 Koha/SearchEngine/Elasticsearch.pm           | 299 +++++++++++++------
 Koha/SearchEngine/Elasticsearch/Indexer.pm   | 210 ++++++++++---
 admin/searchengine/elasticsearch/mappings.pl |   6 +-
 misc/search_tools/rebuild_elastic_search.pl  |   6 +-
 t/Koha/SearchEngine/Elasticsearch.t          |  27 +-
 5 files changed, 405 insertions(+), 143 deletions(-)

diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm
index bd1c5e0f6f..1c5fc29d4b 100644
--- a/Koha/SearchEngine/Elasticsearch.pm
+++ b/Koha/SearchEngine/Elasticsearch.pm
@@ -73,6 +73,15 @@ sub new {
     return $self;
 }
 
+=head2 get_elasticsearch
+
+    my $elasticsearch_client = $self->get_elasticsearch();
+
+Returns a C<Search::Elasticsearch> client. The client is cached on a C<Koha::SearchEngine::ElasticSearch>
+instance level and will be reused if method is called multiple times.
+
+=cut
+
 sub get_elasticsearch {
     my $self = shift @_;
     unless (defined $self->{elasticsearch}) {
@@ -145,8 +154,8 @@ sub get_elasticsearch_params {
 
     my $settings = $self->get_elasticsearch_settings();
 
-This provides the settings provided to elasticsearch when an index is created.
-These can do things like define tokenisation methods.
+This provides the settings provided to Elasticsearch when an index is created.
+These can do things like define tokenization methods.
 
 A hashref containing the settings is returned.
 
@@ -170,7 +179,7 @@ sub get_elasticsearch_settings {
 
     my $mappings = $self->get_elasticsearch_mappings();
 
-This provides the mappings that get passed to elasticsearch when an index is
+This provides the mappings that get passed to Elasticsearch when an index is
 created.
 
 =cut
@@ -232,7 +241,7 @@ sub get_elasticsearch_mappings {
 
 =head2 _get_elasticsearch_mapping
 
-Get the ES mappings for the given purpose and data type
+Get the Elasticsearch mappings for the given purpose and data type.
 
 $mapping = _get_elasticsearch_mapping('search', 'text');
 
@@ -301,51 +310,96 @@ sub sort_fields {
     return $self->_sort_fields_accessor();
 }
 
+=head2 _process_mappings($mappings, $data, $record_document)
+
+Process all C<$mappings> targets operating on a specific MARC field C<$data> applied to C<$record_document>
+Since we group all mappings by MARC field targets C<$mappings> will contain all targets for C<$data>
+and thus we need to fetch the MARC field only once.
+
+=over 4
+
+=item C<$mappings>
+
+Arrayref of mappings containing arrayrefs on the format [C<$taget>, C<$options>] where
+C<$target> is the name of the target field and C<$options> is a hashref containing processing
+directives for this particular mapping.
+
+=item C<$data>
+
+The source data from a MARC record field.
+
+=item C<$record_document>
+
+Hashref representing the  Elasticsearch document on which mappings should be applied.
+
+=back
+
+=cut
+
+sub _process_mappings {
+    my ($_self, $mappings, $data, $record_document) = @_;
+    foreach my $mapping (@{$mappings}) {
+        my ($target, $options) = @{$mapping};
+        # Copy (scalar) data since can have multiple targets
+        # with differing options for (possibly) mutating data
+        # so need a different copy for each
+        my $_data = $data;
+        $record_document->{$target} //= [];
+        if (defined $options->{substr}) {
+            my ($start, $length) = @{$options->{substr}};
+            $_data = length($data) > $start ? substr $data, $start, $length : '';
+        }
+        if (defined $options->{value_callbacks}) {
+            $_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}});
+        }
+        if (defined $options->{property}) {
+            $_data = {
+                $options->{property} => $_data
+            }
+        }
+        push @{$record_document->{$target}}, $_data;
+    }
+}
+
+=head2 marc_records_to_documents($marc_records)
+
+    my @record_documents = $self->marc_records_to_documents($marc_records);
+
+Using mappings stored in database convert C<$marc_records> to Elasticsearch documents.
+
+Returns array of hash references, representing Elasticsearch documents,
+acceptable as body payload in C<Search::Elasticsearch> requests.
+
+=over 4
+
+=item C<$marc_documents>
+
+Reference to array of C<MARC::Record> objects to be converted to Elasticsearch documents.
+
+=back
+
+=cut
+
 sub marc_records_to_documents {
     my ($self, $records) = @_;
-    my $rules = $self->get_marc_mapping_rules();
+    my $rules = $self->_get_marc_mapping_rules();
     my $control_fields_rules = $rules->{control_fields};
     my $data_fields_rules = $rules->{data_fields};
     my $marcflavour = lc C4::Context->preference('marcflavour');
-    my $serialization_format = C4::Context->preference('ElasticsearchMARCSerializationFormat');
 
     my @record_documents;
 
-    sub _process_mappings {
-        my ($mappings, $data, $record_document) = @_;
-        foreach my $mapping (@{$mappings}) {
-            my ($target, $options) = @{$mapping};
-            # Copy (scalar) data since can have multiple targets
-            # with differing options for (possibly) mutating data
-            # so need a different copy for each
-            my $_data = $data;
-            $record_document->{$target} //= [];
-            if (defined $options->{substr}) {
-                my ($start, $length) = @{$options->{substr}};
-                $_data = length($data) > $start ? substr $data, $start, $length : '';
-            }
-            if (defined $options->{value_callbacks}) {
-                $_data = reduce { $b->($a) } ($_data, @{$options->{value_callbacks}});
-            }
-            if (defined $options->{property}) {
-                $_data = {
-                    $options->{property} => $_data
-                }
-            }
-            push @{$record_document->{$target}}, $_data;
-        }
-    }
     foreach my $record (@{$records}) {
         my $record_document = {};
         my $mappings = $rules->{leader};
         if ($mappings) {
-            _process_mappings($mappings, $record->leader(), $record_document);
+            $self->_process_mappings($mappings, $record->leader(), $record_document);
         }
         foreach my $field ($record->fields()) {
             if($field->is_control_field()) {
                 my $mappings = $control_fields_rules->{$field->tag()};
                 if ($mappings) {
-                    _process_mappings($mappings, $field->data(), $record_document);
+                    $self->_process_mappings($mappings, $field->data(), $record_document);
                 }
             }
             else {
@@ -361,7 +415,7 @@ sub marc_records_to_documents {
                             $mappings = [@{$mappings}, @{$wildcard_mappings}];
                         }
                         if (@{$mappings}) {
-                            _process_mappings($mappings, $data, $record_document);
+                            $self->_process_mappings($mappings, $data, $record_document);
                         }
                     }
 
@@ -377,7 +431,7 @@ sub marc_records_to_documents {
                                 )
                             );
                             if ($data) {
-                                _process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document);
+                                $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document);
                             }
                         }
                     }
@@ -426,63 +480,134 @@ sub marc_records_to_documents {
     return \@record_documents;
 }
 
-# Provides the rules for marc to Elasticsearch JSON document conversion.
-sub get_marc_mapping_rules {
-    my ($self) = @_;
+=head2 _field_mappings($facet, $suggestible, $sort, $target_name, $target_type, $range)
 
-    my $marcflavour = lc C4::Context->preference('marcflavour');
-    my @rules;
+Get mappings, an internal data structure later used by L<_process_mappings($mappings, $data, $record_document)>
+to process MARC target data, for a MARC mapping.
+
+The returned C<$mappings> is to to be confused  with mappings provided by C<_foreach_mapping>, rather this
+sub accepts properties from a mapping as provided by C<_foreach_mapping> and expands it to this internal
+data stucture. In the caller context (C<_get_marc_mapping_rules>) the returned C<@mappings> is then
+applied to each MARC target (leader, control field data, subfield or joined subfields) and
+integrated into the mapping rules data structure used in C<marc_records_to_documents> to
+transform MARC records into Elasticsearch documents.
+
+=over 4
 
-    sub _field_mappings {
-        my ($facet, $suggestible, $sort, $target_name, $target_type, $range) = @_;
-        my %mapping_defaults = ();
-        my @mappings;
-
-        my $substr_args = undef;
-        if ($range) {
-            # TODO: use value_callback instead?
-            my ($start, $end) = map(int, split /-/, $range, 2);
-            $substr_args = [$start];
-            push @{$substr_args}, (defined $end ? $end - $start + 1 : 1);
+=item C<$facet>
+
+Boolean indicating whether to create a facet field for this mapping.
+
+=item C<$suggestible>
+
+Boolean indicating whether to create a suggestion field for this mapping.
+
+=item C<$sort>
+
+Boolean indicating whether to create a sort field for this mapping.
+
+=item C<$target_name>
+
+Elasticsearch document target field name.
+
+=item C<$target_type>
+
+Elasticsearch document target field type.
+
+=item C<$range>
+
+An optinal range as a string on the format "<START>-<END>" or "<START>",
+where "<START>" and "<END>" are integers specifying a range that will be used
+for extracting a substing from MARC data as Elasticsearch field target value.
+
+The first character position is "1", and the range is inclusive,
+so "1-3" means the first three characters of MARC data.
+
+If only "<START>" is provided only one character as position "<START>" will
+be extracted.
+
+=back
+
+=cut
+
+sub _field_mappings {
+    my ($_self, $facet, $suggestible, $sort, $target_name, $target_type, $range) = @_;
+    my %mapping_defaults = ();
+    my @mappings;
+
+    my $substr_args = undef;
+    if ($range) {
+        # TODO: use value_callback instead?
+        my ($start, $end) = map(int, split /-/, $range, 2);
+        $substr_args = [$start];
+        push @{$substr_args}, (defined $end ? $end - $start + 1 : 1);
+    }
+    my $default_options = {};
+    if ($substr_args) {
+        $default_options->{substr} = $substr_args;
+    }
+
+    # TODO: Should probably have per type value callback/hook
+    # but hard code for now
+    if ($target_type eq 'boolean') {
+        $default_options->{value_callbacks} //= [];
+        push @{$default_options->{value_callbacks}}, sub {
+            my ($value) = @_;
+            # Trim whitespace at both ends
+            $value =~ s/^\s+|\s+$//g;
+            return $value ? 'true' : 'false';
+        };
+    }
+
+    my $mapping = [$target_name, $default_options];
+    push @mappings, $mapping;
+
+    my @suffixes = ();
+    push @suffixes, 'facet' if $facet;
+    push @suffixes, 'suggestion' if $suggestible;
+    push @suffixes, 'sort' if !defined $sort || $sort;
+
+    foreach my $suffix (@suffixes) {
+        my $mapping = ["${target_name}__$suffix"];
+        # TODO: Hack, fix later in less hideous manner
+        if ($suffix eq 'suggestion') {
+            push @{$mapping}, {%{$default_options}, property => 'input'};
         }
-        my $default_options = {};
-        if ($substr_args) {
-            $default_options->{substr} = $substr_args;
+        else {
+            push @{$mapping}, $default_options;
         }
+        push @mappings, $mapping;
+    }
+    return @mappings;
+};
 
-        # TODO: Should probably have per type value callback/hook
-        # but hard code for now
-        if ($target_type eq 'boolean') {
-            $default_options->{value_callbacks} //= [];
-            push @{$default_options->{value_callbacks}}, sub {
-                my ($value) = @_;
-                # Trim whitespace at both ends
-                $value =~ s/^\s+|\s+$//g;
-                return $value ? 'true' : 'false';
-            };
-        }
+=head2 _get_marc_mapping_rules
 
-        my $mapping = [$target_name, $default_options];
-        push @mappings, $mapping;
+    my $mapping_rules = $self->_get_marc_mapping_rules()
 
-        my @suffixes = ();
-        push @suffixes, 'facet' if $facet;
-        push @suffixes, 'suggestion' if $suggestible;
-        push @suffixes, 'sort' if !defined $sort || $sort;
+Generates rules from mappings stored in database for MARC records to Elasticsearch JSON document conversion.
+
+Since field retrieval is slow in C<MARC::Records> (all fields are itereted through for
+each call to C<MARC::Record>->field) we create an optimized structure of mapping
+rules keyed by MARC field tags holding all the mapping rules for that particular tag.
+
+We can then iterate through all MARC fields for each record and apply all relevant
+rules once per fields instead of retreiving fields multiple times for each mapping rule
+wich is terribly slow.
+
+=cut
+
+# TODO: This structure can be used for processing multiple MARC::Records so is currently
+# rebuilt for each batch. Since it is cacheable it could also be stored in an in
+# memory cache which it is currently not. The performance gain of caching
+# would probably be marginal, but to do this could be a further improvement.
+
+sub _get_marc_mapping_rules {
+    my ($self) = @_;
+
+    my $marcflavour = lc C4::Context->preference('marcflavour');
+    my @rules;
 
-        foreach my $suffix (@suffixes) {
-            my $mapping = ["${target_name}__$suffix"];
-            # Hack, fix later in less hideous manner
-            if ($suffix eq 'suggestion') {
-                push @{$mapping}, {%{$default_options}, property => 'input'};
-            }
-            else {
-                push @{$mapping}, $default_options;
-            }
-            push @mappings, $mapping;
-        }
-        return @mappings;
-    };
     my $field_spec_regexp = qr/^([0-9]{3})([()0-9a-z]+)?(?:_\/(\d+(?:-\d+)?))?$/;
     my $leader_regexp = qr/^leader(?:_\/(\d+(?:-\d+)?))?$/;
     my $rules = {
@@ -494,7 +619,7 @@ sub get_marc_mapping_rules {
     };
 
     $self->_foreach_mapping(sub {
-        my ( $name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field ) = @_;
+        my ($name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field) = @_;
         return if $marc_type ne $marcflavour;
 
         if ($type eq 'sum') {
@@ -550,7 +675,7 @@ sub get_marc_mapping_rules {
             }
 
             my $range = defined $3 ? $3 : undef;
-            my @mappings = _field_mappings($facet, $suggestible, $sort, $name, $type, $range);
+            my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $name, $type, $range);
 
             if ($field_tag < 10) {
                 $rules->{control_fields}->{$field_tag} //= [];
@@ -570,11 +695,11 @@ sub get_marc_mapping_rules {
         }
         elsif ($marc_field =~ $leader_regexp) {
             my $range = defined $1 ? $1 : undef;
-            my @mappings = _field_mappings($facet, $suggestible, $sort, $name, $type, $range);
+            my @mappings = $self->_field_mappings($facet, $suggestible, $sort, $name, $type, $range);
             push @{$rules->{leader}}, @mappings;
         }
         else {
-            die("Invalid marc field: $marc_field");
+            die("Invalid MARC field: $marc_field");
         }
     });
     return $rules;
diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm
index a6bd323d30..0bc68f6435 100644
--- a/Koha/SearchEngine/Elasticsearch/Indexer.pm
+++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm
@@ -44,45 +44,76 @@ Koha::SearchEngine::Elasticsearch::Indexer - handles adding new records to the i
     $indexer->drop_index();
     $indexer->update_index(\@biblionumbers, \@records);
 
-=head1 FUNCTIONS
 
-=head2 $indexer->update_index($biblionums, $records);
+=head1 CONSTANTS
 
-C<$biblionums> is an arrayref containing the biblionumbers for the records.
+=over 4
 
-C<$records> is an arrayref containing the L<MARC::Record>s themselves.
+=item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_OK>
 
-The values in the arrays must match up, and the 999$c value in the MARC record
-will be rewritten using the values in C<$biblionums> to ensure they are correct.
-If C<$biblionums> is C<undef>, this won't happen, but you should be sure that
-999$c is correct on your own then.
+Represents an index state where index is created and in a working state.
 
-Note that this will modify the original record if C<$biblionums> is supplied.
-If that's a problem, clone them first.
+=item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_REINDEX_REQUIRED>
+
+Not currently used, but could be useful later, for example if can detect when new field or mapping added.
+
+=item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_RECREATE_REQUIRED>
+
+Representings an index state where index needs to be recreated and is not in a working state.
+
+=back
 
 =cut
 
 use constant {
     INDEX_STATUS_OK => 0,
-    INDEX_STATUS_REINDEX_REQUIRED => 1, # Not currently used, but could be useful later, for example if can detect when new field or mapping added
+    INDEX_STATUS_REINDEX_REQUIRED => 1,
     INDEX_STATUS_RECREATE_REQUIRED => 2,
 };
 
+=head1 FUNCTIONS
+
+=head2 update_index($biblionums, $records)
+
+    try {
+        $self->update_index($biblionums, $records);
+    } catch {
+        die("Something whent wrong trying to update index:" .  $_[0]);
+    }
+
+Converts C<MARC::Records> C<$records> to Elasticsearch documents and performs
+an update request for these records on the Elasticsearch index.
+
+The values in the arrays must match up, and the 999$c value in the MARC record
+will be rewritten using the values in C<$biblionums> to ensure they are correct.
+If C<$biblionums> is C<undef>, this won't happen, so in that case you should make
+sure that 999$c is correct.
+
+Note that this will modify the original record if C<$biblionums> is supplied.
+If that's a problem, clone them first.
+
+=over 4
+
+=item C<$biblionums>
+
+Arrayref of biblio numbers for the C<$records>, the order must be the same as
+and match up with C<$records>.
+
+=item C<$records>
+
+Arrayref of C<MARC::Record>s.
+
+=back
+
+=cut
+
 sub update_index {
     my ($self, $biblionums, $records) = @_;
 
-    # TODO should have a separate path for dealing with a large number
-    # of records at once where we use the bulk update functions in ES.
     if ($biblionums) {
         $self->_sanitise_records($biblionums, $records);
     }
 
-    $self->bulk_index($records);
-    return 1;
-}
-
-sub bulk_index {
-    my ($self, $records) = @_;
     my $conf = $self->get_elasticsearch_params();
     my $elasticsearch = $self->get_elasticsearch();
     my $documents = $self->marc_records_to_documents($records);
@@ -108,27 +139,88 @@ sub bulk_index {
     return 1;
 }
 
-sub index_status_ok {
-    my ($self, $set) = @_;
-    return defined $set ?
-        $self->index_status(INDEX_STATUS_OK) :
-        $self->index_status == INDEX_STATUS_OK;
+=head2 set_index_status_ok
+
+Convenience method for setting index status to C<INDEX_STATUS_OK>.
+
+=cut
+
+sub set_index_status_ok {
+    my ($self) = @_;
+    $self->index_status(INDEX_STATUS_OK);
 }
 
-sub index_status_reindex_required {
-    my ($self, $set) = @_;
-    return defined $set ?
-        $self->index_status(INDEX_STATUS_REINDEX_REQUIRED) :
-        $self->index_status == INDEX_STATUS_REINDEX_REQUIRED;
+=head2 is_index_status_ok
+
+Convenience method for checking if index status is C<INDEX_STATUS_OK>.
+
+=cut
+
+sub is_index_status_ok {
+    my ($self) = @_;
+    return $self->index_status == INDEX_STATUS_OK;
 }
 
-sub index_status_recreate_required {
-    my ($self, $set) = @_;
-    return defined $set ?
-        $self->index_status(INDEX_STATUS_RECREATE_REQUIRED) :
-        $self->index_status == INDEX_STATUS_RECREATE_REQUIRED;
+=head2 set_index_status_reindex_required
+
+Convenience method for setting index status to C<INDEX_REINDEX_REQUIRED>.
+
+=cut
+
+sub set_index_status_reindex_required {
+    my ($self) = @_;
+    $self->index_status(INDEX_STATUS_REINDEX_REQUIRED);
 }
 
+=head2 is_index_status_reindex_required
+
+Convenience method for checking if index status is C<INDEX_STATUS_REINDEX_REQUIRED>.
+
+=cut
+
+sub is_index_status_reindex_required {
+    my ($self) = @_;
+    return $self->index_status == INDEX_STATUS_REINDEX_REQUIRED;
+}
+
+=head2 set_index_status_recreate_required
+
+Convenience method for setting index status to C<INDEX_STATUS_RECREATE_REQUIRED>.
+
+=cut
+
+sub set_index_status_recreate_required {
+    my ($self) = @_;
+    $self->index_status(INDEX_STATUS_RECREATE_REQUIRED);
+}
+
+=head2 is_index_status_recreate_required
+
+Convenience method for checking if index status is C<INDEX_STATUS_RECREATE_REQUIRED>.
+
+=cut
+
+sub is_index_status_recreate_required {
+    my ($self) = @_;
+    return $self->index_status == INDEX_STATUS_RECREATE_REQUIRED;
+}
+
+=head2 index_status($status)
+
+Will either set the current index status to C<$status> and return C<$status>,
+or return the current index status if called with no arguments.
+
+=over 4
+
+=item C<$status>
+
+Optional argument. If passed will set current index status to C<$status> if C<$status> is
+a valid status. See L</CONSTANTS>.
+
+=back
+
+=cut
+
 sub index_status {
     my ($self, $status) = @_;
     my $key = 'ElasticsearchIndexStatus_' . $self->index;
@@ -150,6 +242,15 @@ sub index_status {
     }
 }
 
+=head2 update_mappings
+
+Generate Elasticsearch mappings from mappings stored in database and
+perform a request to update Elasticsearch index mappings. Will throw an
+error and set index status to C<INDEX_STATUS_RECREATE_REQUIRED> if update
+failes.
+
+=cut
+
 sub update_mappings {
     my ($self) = @_;
     my $conf = $self->get_elasticsearch_params();
@@ -166,35 +267,35 @@ sub update_mappings {
                 }
             );
         } catch {
-            $self->index_status_recreate_required(1);
+            $self->set_index_status_recreate_required();
             my $reason = $_[0]->{vars}->{body}->{error}->{reason};
             Koha::Exceptions::Exception->throw(
                 error => "Unable to update mappings for index \"$conf->{index_name}\". Reason was: \"$reason\". Index needs to be recreated and reindexed",
             );
         };
     }
-    $self->index_status_ok(1);
+    $self->set_index_status_ok();
 }
 
-=head2 $indexer->update_index_background($biblionums, $records)
+=head2 update_index_background($biblionums, $records)
 
-This has exactly the same API as C<update_index_background> however it'll
+This has exactly the same API as C<update_index> however it'll
 return immediately. It'll start a background process that does the adding.
 
 If it fails to add to Elasticsearch then it'll add to a queue that will cause
 it to be updated by a regular index cron job in the future.
 
+=cut
+
 # TODO implement in the future - I don't know the best way of doing this yet.
 # If fork: make sure process group is changed so apache doesn't wait for us.
 
-=cut
-
 sub update_index_background {
     my $self = shift;
     $self->update_index(@_);
 }
 
-=head2 $indexer->delete_index($biblionums)
+=head2 delete_index($biblionums)
 
 C<$biblionums> is an arrayref of biblionumbers to delete from the index.
 
@@ -217,23 +318,21 @@ sub delete_index {
     $self->store->bag->commit;
 }
 
-=head2 $indexer->delete_index_background($biblionums)
+=head2 delete_index_background($biblionums)
 
-Identical to L<delete_index>, this will return immediately and start a
-background process to do the actual deleting.
+Identical to L</delete_index($biblionums)>
 
 =cut
 
-# TODO implement in the future
-
+# TODO: Should be made async
 sub delete_index_background {
     my $self = shift;
     $self->delete_index(@_);
 }
 
-=head2 $indexer->drop_index();
+=head2 drop_index
 
-Drops the index from the elasticsearch server.
+Drops the index from the Elasticsearch server.
 
 =cut
 
@@ -243,10 +342,16 @@ sub drop_index {
         my $conf = $self->get_elasticsearch_params();
         my $elasticsearch = $self->get_elasticsearch();
         $elasticsearch->indices->delete(index => $conf->{index_name});
-        $self->index_status_recreate_required(1);
+        $self->set_index_status_recreate_required();
     }
 }
 
+=head2 create_index
+
+Creates the index (including mappings) on the Elasticsearch server.
+
+=cut
+
 sub create_index {
     my ($self) = @_;
     my $conf = $self->get_elasticsearch_params();
@@ -261,6 +366,13 @@ sub create_index {
     $self->update_mappings();
 }
 
+=head2 index_exists
+
+Checks if index has been created on the Elasticsearch server. Returns C<1> or the
+empty string to indicate whether index exists or not.
+
+=cut
+
 sub index_exists {
     my ($self) = @_;
     my $conf = $self->get_elasticsearch_params();
diff --git a/admin/searchengine/elasticsearch/mappings.pl b/admin/searchengine/elasticsearch/mappings.pl
index d5fef83c38..a2c1f66580 100755
--- a/admin/searchengine/elasticsearch/mappings.pl
+++ b/admin/searchengine/elasticsearch/mappings.pl
@@ -150,16 +150,16 @@ my @indexes;
 
 for my $index_name (@index_names) {
     my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new({ index => $index_name });
-    if (!$indexer->index_status_ok) {
+    if (!$indexer->is_index_status_ok) {
         my $conf = $indexer->get_elasticsearch_params();
-        if ($indexer->index_status_reindex_required) {
+        if ($indexer->is_index_status_reindex_required) {
             push @messages, {
                 type => 'error',
                 code => 'reindex_required',
                 index => $conf->{index_name},
             };
         }
-        elsif($indexer->index_status_recreate_required) {
+        elsif($indexer->is_index_status_recreate_required) {
             push @messages, {
                 type => 'error',
                 code => 'recreate_required',
diff --git a/misc/search_tools/rebuild_elastic_search.pl b/misc/search_tools/rebuild_elastic_search.pl
index bed6b5197c..caa9a3d870 100755
--- a/misc/search_tools/rebuild_elastic_search.pl
+++ b/misc/search_tools/rebuild_elastic_search.pl
@@ -165,13 +165,13 @@ sub do_reindex {
         $indexer->drop_index() if $indexer->index_exists();
         $indexer->create_index();
     }
-    elsif (!$indexer->index_exists()) {
+    elsif (!$indexer->index_exists) {
         # Create index if does not exist
         $indexer->create_index();
-    } elsif ($indexer->index_status_ok) {
+    } elsif ($indexer->is_index_status_ok) {
         # Update mapping unless index is some kind of problematic state
         $indexer->update_mappings();
-    } elsif ($indexer->index_status_recreate_required) {
+    } elsif ($indexer->is_index_status_recreate_required) {
         warn qq/Index "$index_name" has status "recreate required", suggesting it should be recreated/;
     }
 
diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/Koha/SearchEngine/Elasticsearch.t
index f4fe4355fa..323a3b0106 100644
--- a/t/Koha/SearchEngine/Elasticsearch.t
+++ b/t/Koha/SearchEngine/Elasticsearch.t
@@ -115,7 +115,7 @@ subtest 'get_elasticsearch_mappings() tests' => sub {
 
 subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' => sub {
 
-    plan tests => 30;
+    plan tests => 32;
 
     t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
 
@@ -315,6 +315,8 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
 
     ok(defined $docs->[0][1]->{marc_format}, 'First document marc_format field should be set');
 
+    is($docs->[0][1]->{marc_format}, 'base64ISO2709', 'First document marc_format should be set correctly');
+
     is(scalar @{$docs->[0][1]->{type_of_record}}, 1, 'First document type_of_record field should have one value');
     is_deeply(
         $docs->[0][1]->{type_of_record},
@@ -351,4 +353,27 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
 
     ok(!(defined $docs->[0][1]->{unimarc_title}), "No mapping when marc_type doesn't match marc flavour");
 
+    # Marc serialization format fallback for records exceeding ISO2709 max record size
+
+    my $large_marc_record = MARC::Record->new();
+    $large_marc_record->leader('     cam  22      a 4500');
+
+    $large_marc_record->append_fields(
+        MARC::Field->new('100', '', '', a => 'Author 1'),
+        MARC::Field->new('110', '', '', a => 'Corp Author'),
+        MARC::Field->new('210', '', '', a => 'Title 1'),
+        MARC::Field->new('245', '', '', a => 'Title:', b => 'large record'),
+        MARC::Field->new('999', '', '', c => '1234567'),
+    );
+
+    my $item_field = MARC::Field->new('952', '', '', o => '123456789123456789123456789', p => '123456789', z => 'test');
+    my $items_count = 1638;
+    while(--$items_count) {
+        $large_marc_record->append_fields($item_field);
+    }
+
+    $docs = $see->marc_records_to_documents([$large_marc_record]);
+
+    is($docs->[0][1]->{marc_format}, 'MARCXML', 'For record exceeding max record size marc_format should be set correctly');
+
 };
-- 
2.39.5