From 5597b666e468af8708c82d2c0d06a6a133e82677 Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Tue, 28 Apr 2020 12:19:56 +0000 Subject: [PATCH] Bug 25273: Make match-heading rely on authority type configuration The match-heading field is a special field used only by the linker, not accessible to staff or patrons via the interface. This field is used to store the constructed 'search form' used for matching bib headings to authority fields. In bug 24269 I attempted to use the mappings defined in the inferface and also inject the search term. This did not work as too many subfields were indexed on their own and leading to false matches. In this bug we remove the mappings for this field, and create it ourselves during the indexing process. The C4::Headings module is still used to generate the correct form, however, the mappings are set based on the authority types in the system. This gives the user the ability to add new typoes, but prevents mapping changes from breaking linker functionality To test: 1 - Start form a sample database with ElasticSearch working 2 - Download via Z39.50 2 authorities, one of which is a narrower heading of the other, e.g.: Waterworks Waterworks - Costs 3 - Place a heading for the broader term in a record. e.g. Waterworks In 650$a, without the cataloguing authority plugin. We don't want the link created now. You need syspref BiblioAddsAuthorities => allow 4 - Make sure linker is set to default 5 - Attempt to link the records misc/link_bibs_to_authorities.pl 6 - Linking fails 7 - Apply patch 8 - refresh index settings (if using a custom file, remove 'match-heading') You can reset mappings in the UI or run this: misc/search_tools/rebuild_elasticsearch.pl -v -d -r 9 - Reindex ES 10 - Try to link again 11 - It succeeds! 12 - Run the tests prove t/db_dependent/Koha/SearchEngine/Elasticsearch.t Signed-off-by: Victor Grousset/tuxayo Signed-off-by: Katrin Fischer Bug 25273: (follow-up) Signed-off-by: Katrin Fischer Signed-off-by: Jonathan Druart (cherry picked from commit ce161fda9b7d47e3cfcbc73ddb877eed627d6313) Signed-off-by: Lucas Gass --- C4/Biblio.pm | 5 +- Koha/SearchEngine/Elasticsearch.pm | 31 ++++---- .../Elasticsearch/QueryBuilder.pm | 2 +- .../searchengine/elasticsearch/mappings.yaml | 24 ------- .../Koha/SearchEngine/Elasticsearch.t | 70 +++++-------------- 5 files changed, 35 insertions(+), 97 deletions(-) rename t/{ => db_dependent}/Koha/SearchEngine/Elasticsearch.t (94%) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 895197d55a..0f0e9e62b2 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -2497,8 +2497,8 @@ $op is specialUpdate or recordDelete, and is used to know what we want to do $server is the server that we want to update -$record is the update MARC record if it's available. If it's not supplied -and is needed, it'll be loaded from the database. +$record is the updated MARC record. If it's not supplied +and is needed it will be loaded from the database. =cut @@ -2523,7 +2523,6 @@ sub ModZebra { biblionumber => $biblionumber, embed_items => 1 }); } - my $records = [$record]; $indexer->update_index_background( [$biblionumber], [$record] ); } elsif ( $op eq 'recordDelete' ) { diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index b518405cf5..f00d0bb48d 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -28,6 +28,7 @@ use Koha::SearchFields; use Koha::SearchMarcMaps; use Koha::Caches; use C4::Heading; +use C4::AuthoritiesMarc; use Carp; use Clone qw(clone); @@ -229,10 +230,10 @@ sub get_elasticsearch_mappings { } } ); + $mappings->{data}{properties}{ 'match-heading' } = _get_elasticsearch_field_config('search', 'text') if $self->index eq 'authorities'; $all_mappings{$self->index} = $mappings; } $self->sort_fields(\%{$sort_fields{$self->index}}); - return $all_mappings{$self->index}; } @@ -522,8 +523,22 @@ sub marc_records_to_documents { my @record_documents; + my %auth_match_headings; + if( $self->index eq 'authorities' ){ + my @auth_types = Koha::Authority::Types->search(); + %auth_match_headings = map { $_->authtypecode => $_->auth_tag_to_report } @auth_types; + } + foreach my $record (@{$records}) { my $record_document = {}; + + if ( $self->index eq 'authorities' ){ + my $authtypecode = GuessAuthTypeCode( $record ); + my $field = $record->field( $auth_match_headings{ $authtypecode } ); + my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading + push @{$record_document->{'match-heading'}}, $heading->search_form if $heading; + } + my $mappings = $rules->{leader}; if ($mappings) { $self->_process_mappings($mappings, $record->leader(), $record_document, { @@ -575,13 +590,6 @@ sub marc_records_to_documents { } ); } - if ( @{$mappings} && grep { $_->[0] eq 'match-heading'} @{$mappings} ){ - # Used by the authority linker the match-heading field requires a specific syntax - # that is specified in C4/Heading - my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading - next unless $heading; - push @{$record_document->{'match-heading'}}, $heading->search_form; - } } my $subfields_join_mappings = $data_field_rules->{subfields_join}; @@ -604,13 +612,6 @@ sub marc_records_to_documents { } ); } - if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){ - # Used by the authority linker the match-heading field requires a specific syntax - # that is specified in C4/Heading - my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading - next unless $heading; - push @{$record_document->{'match-heading'}}, $heading->search_form; - } } } } diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm index 8e2adf8136..d49de32eea 100644 --- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm +++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm @@ -476,7 +476,7 @@ sub build_authorities_query_compat { $m = exists $koha_to_index_name->{$m} ? $koha_to_index_name->{$m} : $m; push @indexes, $m; - warn "Unknown search field $m in marclist" unless (defined $mappings->{data}->{properties}->{$m} || $m eq ''); + warn "Unknown search field $m in marclist" unless (defined $mappings->{data}->{properties}->{$m} || $m eq '' || $m eq 'match-heading'); } for ( my $i = 0 ; $i < @$value ; $i++ ) { next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches diff --git a/admin/searchengine/elasticsearch/mappings.yaml b/admin/searchengine/elasticsearch/mappings.yaml index b72fe18be4..847647e0fd 100644 --- a/admin/searchengine/elasticsearch/mappings.yaml +++ b/admin/searchengine/elasticsearch/mappings.yaml @@ -695,30 +695,6 @@ authorities: sort: ~ suggestible: '' type: '' - Match-heading: - label: Match-heading - mappings: - - facet: '' - marc_field: 100(abcdefghjklmnopqrstvxyz) - marc_type: marc21 - sort: ~ - suggestible: '' - - facet: '' - marc_field: 111(acdefghjklnpqstvxyz) - marc_type: marc21 - sort: ~ - suggestible: '' - - facet: '' - marc_field: 100(abcdefghjklmnopqrstvxyz) - marc_type: normarc - sort: ~ - suggestible: '' - - facet: '' - marc_field: 111(acdefghjklnpqstvxyz) - marc_type: normarc - sort: ~ - suggestible: '' - type: '' Match-heading-see-from: label: Match-heading-see-from mappings: diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/db_dependent/Koha/SearchEngine/Elasticsearch.t similarity index 94% rename from t/Koha/SearchEngine/Elasticsearch.t rename to t/db_dependent/Koha/SearchEngine/Elasticsearch.t index c4bcc1bc6a..12c3599885 100644 --- a/t/Koha/SearchEngine/Elasticsearch.t +++ b/t/db_dependent/Koha/SearchEngine/Elasticsearch.t @@ -21,6 +21,7 @@ use Test::More tests => 6; use Test::Exception; use t::lib::Mocks; +use t::lib::TestBuilder; use Test::MockModule; @@ -31,6 +32,9 @@ use List::Util qw( any ); use Koha::SearchEngine::Elasticsearch; use Koha::SearchEngine::Elasticsearch::Search; +my $schema = Koha::Database->new->schema; +$schema->storage->txn_begin; + subtest '_read_configuration() tests' => sub { plan tests => 13; @@ -346,7 +350,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests' MARC::Field->new('999', '', '', c => '1234568'), MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric', o => $long_callno), ); - my $records = [$marc_record_1, $marc_record_2]; + my $records = [ $marc_record_1, $marc_record_2 ]; $see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first @@ -626,7 +630,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents_array () t MARC::Field->new('999', '', '', c => '1234568'), MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric'), ); - my $records = [$marc_record_1, $marc_record_2]; + my $records = [ $marc_record_1, $marc_record_2 ]; $see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first @@ -652,6 +656,12 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709'); + my $builder = t::lib::TestBuilder->new; + my $auth_type = $builder->build_object({ class => 'Koha::Authority::Types', value =>{ + auth_tag_to_report => '150' + } + }); + my @mappings = ( { name => 'match', @@ -662,57 +672,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori sort => 0, marc_type => 'marc21', marc_field => '150(ae)', - }, - { - name => 'heading', - type => 'string', - facet => 0, - suggestible => 0, - searchable => 1, - sort => 0, - marc_type => 'marc21', - marc_field => '150a', - }, - { - name => 'heading', - type => 'string', - facet => 0, - suggestible => 0, - searchable => 1, - sort => 0, - marc_type => 'marc21', - marc_field => '150(ae)', - }, - { - name => 'heading-main', - type => 'string', - facet => 0, - suggestible => 0, - searchable => 1, - sort => 0, - marc_type => 'marc21', - marc_field => '150a', - }, - { - name => 'heading', - type => 'string', - facet => 0, - suggestible => 0, - searchable => 1, - sort => 0, - marc_type => 'marc21', - marc_field => '150', - }, - { - name => 'match-heading', - type => 'string', - facet => 0, - suggestible => 0, - searchable => 1, - sort => 0, - marc_type => 'marc21', - marc_field => '150', - }, + } ); my $se = Test::MockModule->new('Koha::SearchEngine::Elasticsearch'); @@ -745,7 +705,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori $marc_record_2->append_fields( MARC::Field->new('150', '', '', a => 'Subject', v => 'Genresubdiv', z => 'Geosubdiv', x => 'Generalsubdiv', e => 'wrongsubdiv' ), ); - my $records = [$marc_record_1, $marc_record_2]; + my $records = [ $marc_record_1, $marc_record_2 ]; $see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first @@ -762,3 +722,5 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori "Second record match-heading should contain the correctly formatted heading without wrong subfield" ); }; + +$schema->storage->txn_rollback; -- 2.39.5