From 4077a6faa98a8fe269c384eac8c7de31eedfc5ea Mon Sep 17 00:00:00 2001 From: Fridolin Somers Date: Thu, 19 Sep 2019 16:55:51 +0200 Subject: [PATCH] Bug 23630: Do not remove field 999 in Elasticsearch indexing Elasticsearch indexing uses 999$c to store record id by deleting the all field first ! So you can not store anything in field 999, even in UNIMARC and even in authorities records. Looks like it is quick fix code added to start Elasticsearch use. This behavior is disturbing and very strange for UNIMARC flavour. This patch corrects by defining record ids mandatory in Koha::SearchEngine::Elasticsearch::Indexer::update_index(). This ids array is actually always given (except in UT). I think it is useless to allow adding a record without its id. Test plan : 1) Use Elasticsearch as SearchEngine 2) Create a subfield 999$z in default framework 3) Create a record with default framework 4) Enter a random string (never used in catalog) like "tototata" in 999$z 5) In Search engine configuration, define search field "subject" for 999$z 6) Rebuild record : misc/search_tools/rebuild_elasticsearch.pl -b -bn -v 7) Search for the random string => You get a result 8) Optionnaly look at records in ES : :9200//data/ Signed-off-by: Nick Clemens Signed-off-by: Martin Renvoize (cherry picked from commit fb083813a8237691f0b9d419d826fdea58096cd4) Signed-off-by: Fridolin Somers --- Koha/SearchEngine/Elasticsearch.pm | 5 +-- Koha/SearchEngine/Elasticsearch/Indexer.pm | 39 ++----------------- .../Koha/SearchEngine/Elasticsearch/Indexer.t | 2 +- 3 files changed, 6 insertions(+), 40 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 4435015d81..3b2b4ee08a 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -385,7 +385,7 @@ sub _process_mappings { =head2 marc_records_to_documents($marc_records) - my @record_documents = $self->marc_records_to_documents($marc_records); + my $record_documents = $self->marc_records_to_documents($marc_records); Using mappings stored in database convert C<$marc_records> to Elasticsearch documents. @@ -548,8 +548,7 @@ sub marc_records_to_documents { else { $record_document->{'marc_format'} = 'base64ISO2709'; } - my $id = $record->subfield('999', 'c'); - push @record_documents, [$id, $record_document]; + push @record_documents, $record_document; } return \@record_documents; } diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm index 703f39f3ca..3610f0191d 100644 --- a/Koha/SearchEngine/Elasticsearch/Indexer.pm +++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm @@ -84,14 +84,6 @@ use constant { Converts C C<$records> to Elasticsearch documents and performs an update request for these records on the Elasticsearch index. -The values in the arrays must match up, and the 999$c value in the MARC record -will be rewritten using the values in C<$biblionums> to ensure they are correct. -If C<$biblionums> is C, this won't happen, so in that case you should make -sure that 999$c is correct. - -Note that this will modify the original record if C<$biblionums> is supplied. -If that's a problem, clone them first. - =over 4 =item C<$biblionums> @@ -110,17 +102,14 @@ Arrayref of Cs. sub update_index { my ($self, $biblionums, $records) = @_; - if ($biblionums) { - $self->_sanitise_records($biblionums, $records); - } - my $conf = $self->get_elasticsearch_params(); my $elasticsearch = $self->get_elasticsearch(); my $documents = $self->marc_records_to_documents($records); my @body; - foreach my $document_info (@{$documents}) { - my ($id, $document) = @{$document_info}; + for (my $i=0; $i < scalar @$biblionums; $i++) { + my $id = $biblionums->[$i]; + my $document = $documents->[$i]; push @body, { index => { _id => $id @@ -382,28 +371,6 @@ sub index_exists { ); } -sub _sanitise_records { - my ($self, $biblionums, $records) = @_; - - confess "Unequal number of values in \$biblionums and \$records." if (@$biblionums != @$records); - - my $c = @$biblionums; - for (my $i=0; $i<$c; $i++) { - my $bibnum = $biblionums->[$i]; - my $rec = $records->[$i]; - # I've seen things you people wouldn't believe. Attack ships on fire - # off the shoulder of Orion. I watched C-beams glitter in the dark near - # the Tannhauser gate. MARC records where 999$c doesn't match the - # biblionumber column. All those moments will be lost in time... like - # tears in rain... - if ( $rec ) { - $rec->delete_fields($rec->field('999')); - # Make sure biblionumber is a string. Elasticsearch would consider int and string different IDs. - $rec->append_fields(MARC::Field->new('999','','','c' => "" . $bibnum, 'd' => "" . $bibnum)); - } - } -} - 1; __END__ diff --git a/t/db_dependent/Koha/SearchEngine/Elasticsearch/Indexer.t b/t/db_dependent/Koha/SearchEngine/Elasticsearch/Indexer.t index 80bfe79956..675ddda65a 100644 --- a/t/db_dependent/Koha/SearchEngine/Elasticsearch/Indexer.t +++ b/t/db_dependent/Koha/SearchEngine/Elasticsearch/Indexer.t @@ -59,7 +59,7 @@ subtest 'create_index() tests' => sub { MARC::Field->new('245', '', '', 'a' => 'Title') ); my $records = [$marc_record]; - ok($indexer->update_index(undef, $records), 'Update Index'); + ok($indexer->update_index([1], $records), 'Update Index'); is( $indexer->drop_index(), -- 2.39.5