From 890fd6ff068f58da4c72194ebc14be2cf479040b Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Wed, 16 Sep 2020 11:31:54 +0000 Subject: [PATCH] Bug 23828: Elasticsearch - Preserve record order when combining subfield in mappings Currently if you combine subfields in the marc mappings the subfields are indexed in the order listed in the mapping. i.e. 650(avxyz) in mapping and in record: 650 $aHeading $zGeosubdiv $vFormsubdiv is indexed as: Heading Formsubdiv Geosubdiv We should preserve the order and index as: Heading Geosubdiv Formsubdiv We can use built in function in Marc::Field to achieve this To test: 1 - It is easy to find examples of this using authorities 2 - Find or create a record with subfields order azv e.g. 150$aActresses$zUnited states$vBiography 3 - Add or have a second authority e.g. 150$aActresses$vPortraits 4 - Set an authorities mapping for 'Heading' to 150(abgvxyz) find at: Administration->Search engine configuration (Elasticsearch)->Authorities tab 5 - Index the records in Elasticsearch perl misc/search_tools/rebuild_elaticsearch.pl -a -ai 1691 -ai 1692 6 - View the first record in the ES index curl es:9200/koha_kohadev_authorities/data/1692?pretty 7 - Note 'Heading' field is ordered as in the mapping 8 - Search authorities for 'contains' "act" 9 - Note the records sort incorrectly 10 - Apply patches 11 - perl misc/search_tools/rebuild_elaticsearch.pl -a -ai 1692 12 - curl es:9200/koha_kohadev_authorities/data/1692?pretty 13 - Note the order is now preserved 14 - Search authorities for 'contains' "act" 15 - Note the records sort correctly Signed-off-by: Heather Hernandez Signed-off-by: Katrin Fischer JD amended patch: Fix FAIL spelling combind ==> combined Signed-off-by: Jonathan Druart --- Koha/SearchEngine/Elasticsearch.pm | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 07b3763082..e7a8964ba1 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -619,14 +619,9 @@ sub marc_records_to_documents { my $subfields_join_mappings = $data_field_rules->{subfields_join}; if ($subfields_join_mappings) { foreach my $subfields_group (keys %{$subfields_join_mappings}) { - # Map each subfield to values, remove empty values, join with space - my $data = join( - ' ', - grep( - $_, - map { join(' ', $field->subfield($_)) } split(//, $subfields_group) - ) - ); + my $data_field = $field->clone; #copy field to preserve for alt scripts + $data_field->delete_subfield(match => qr/^$/); #remove empty subfields, otherwise they are printed as a space + my $data = $data_field->as_string( $subfields_group ); #get values for subfields as a combined string, preserving record order if ($data) { $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, { altscript => $altscript, -- 2.39.5