瀏覽代碼

Bug 25273: Make match-heading rely on authority type configuration

The match-heading field is a special field used only by the linker, not accessible
to staff or patrons via the interface. This field is used to store the constructed
'search form' used for matching bib headings to authority fields.

In bug 24269 I attempted to use the mappings defined in the inferface and also inject the search term.
This did not work as too many subfields were indexed on their own and leading to false matches.
In this bug we remove the mappings for this field, and create it ourselves during
the indexing process. The C4::Headings module is still used to generate the correct form,
however, the mappings are set based on the authority types in the system. This gives the user
the ability to add new typoes, but prevents mapping changes from breaking linker functionality

To test:
 1 - Start form a sample database with ElasticSearch working
 2 - Download via Z39.50 2 authorities, one of which is a narrower heading of the other, e.g.:
    Waterworks
    Waterworks - Costs
 3 - Place a heading for the broader term in a record. e.g. Waterworks
       In 650$a, without the cataloguing authority plugin. We don't want
       the link created now.
       You need syspref BiblioAddsAuthorities => allow
 4 - Make sure linker is set to default
 5 - Attempt to link the records
       misc/link_bibs_to_authorities.pl
 6 - Linking fails
 7 - Apply patch
 8 - refresh index settings (if using a custom file, remove 'match-heading')
       You can reset mappings in the UI or run this:
       misc/search_tools/rebuild_elasticsearch.pl -v -d -r
 9 - Reindex ES
10 - Try to link again
11 - It succeeds!
12 - Run the tests
     prove t/db_dependent/Koha/SearchEngine/Elasticsearch.t

Signed-off-by: Victor Grousset/tuxayo <victor@tuxayo.net>

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Bug 25273: (follow-up)

Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de>

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
20.11.x
Nick Clemens 4 年前
提交者 Jonathan Druart
父節點
當前提交
ce161fda9b
  1. 5
      C4/Biblio.pm
  2. 31
      Koha/SearchEngine/Elasticsearch.pm
  3. 2
      Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
  4. 24
      admin/searchengine/elasticsearch/mappings.yaml
  5. 70
      t/db_dependent/Koha/SearchEngine/Elasticsearch.t

5
C4/Biblio.pm

@ -2497,8 +2497,8 @@ $op is specialUpdate or recordDelete, and is used to know what we want to do
$server is the server that we want to update
$record is the update MARC record if it's available. If it's not supplied
and is needed, it'll be loaded from the database.
$record is the updated MARC record. If it's not supplied
and is needed it will be loaded from the database.
=cut
@ -2523,7 +2523,6 @@ sub ModZebra {
biblionumber => $biblionumber,
embed_items => 1 });
}
my $records = [$record];
$indexer->update_index_background( [$biblionumber], [$record] );
}
elsif ( $op eq 'recordDelete' ) {

31
Koha/SearchEngine/Elasticsearch.pm

@ -28,6 +28,7 @@ use Koha::SearchFields;
use Koha::SearchMarcMaps;
use Koha::Caches;
use C4::Heading;
use C4::AuthoritiesMarc;
use Carp;
use Clone qw(clone);
@ -234,10 +235,10 @@ sub get_elasticsearch_mappings {
}
}
);
$mappings->{data}{properties}{ 'match-heading' } = _get_elasticsearch_field_config('search', 'text') if $self->index eq 'authorities';
$all_mappings{$self->index} = $mappings;
}
$self->sort_fields(\%{$sort_fields{$self->index}});
return $all_mappings{$self->index};
}
@ -527,8 +528,22 @@ sub marc_records_to_documents {
my @record_documents;
my %auth_match_headings;
if( $self->index eq 'authorities' ){
my @auth_types = Koha::Authority::Types->search();
%auth_match_headings = map { $_->authtypecode => $_->auth_tag_to_report } @auth_types;
}
foreach my $record (@{$records}) {
my $record_document = {};
if ( $self->index eq 'authorities' ){
my $authtypecode = GuessAuthTypeCode( $record );
my $field = $record->field( $auth_match_headings{ $authtypecode } );
my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
push @{$record_document->{'match-heading'}}, $heading->search_form if $heading;
}
my $mappings = $rules->{leader};
if ($mappings) {
$self->_process_mappings($mappings, $record->leader(), $record_document, {
@ -580,13 +595,6 @@ sub marc_records_to_documents {
}
);
}
if ( @{$mappings} && grep { $_->[0] eq 'match-heading'} @{$mappings} ){
# Used by the authority linker the match-heading field requires a specific syntax
# that is specified in C4/Heading
my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
next unless $heading;
push @{$record_document->{'match-heading'}}, $heading->search_form;
}
}
my $subfields_join_mappings = $data_field_rules->{subfields_join};
@ -609,13 +617,6 @@ sub marc_records_to_documents {
}
);
}
if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){
# Used by the authority linker the match-heading field requires a specific syntax
# that is specified in C4/Heading
my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
next unless $heading;
push @{$record_document->{'match-heading'}}, $heading->search_form;
}
}
}
}

2
Koha/SearchEngine/Elasticsearch/QueryBuilder.pm

@ -476,7 +476,7 @@ sub build_authorities_query_compat {
$m = exists $koha_to_index_name->{$m} ? $koha_to_index_name->{$m} : $m;
push @indexes, $m;
warn "Unknown search field $m in marclist" unless (defined $mappings->{data}->{properties}->{$m} || $m eq '');
warn "Unknown search field $m in marclist" unless (defined $mappings->{data}->{properties}->{$m} || $m eq '' || $m eq 'match-heading');
}
for ( my $i = 0 ; $i < @$value ; $i++ ) {
next unless $value->[$i]; #clean empty form values, ES doesn't like undefined searches

24
admin/searchengine/elasticsearch/mappings.yaml

@ -695,30 +695,6 @@ authorities:
sort: ~
suggestible: ''
type: ''
Match-heading:
label: Match-heading
mappings:
- facet: ''
marc_field: 100(abcdefghjklmnopqrstvxyz)
marc_type: marc21
sort: ~
suggestible: ''
- facet: ''
marc_field: 111(acdefghjklnpqstvxyz)
marc_type: marc21
sort: ~
suggestible: ''
- facet: ''
marc_field: 100(abcdefghjklmnopqrstvxyz)
marc_type: normarc
sort: ~
suggestible: ''
- facet: ''
marc_field: 111(acdefghjklnpqstvxyz)
marc_type: normarc
sort: ~
suggestible: ''
type: ''
Match-heading-see-from:
label: Match-heading-see-from
mappings:

70
t/Koha/SearchEngine/Elasticsearch.t → t/db_dependent/Koha/SearchEngine/Elasticsearch.t

@ -21,6 +21,7 @@ use Test::More tests => 6;
use Test::Exception;
use t::lib::Mocks;
use t::lib::TestBuilder;
use Test::MockModule;
@ -31,6 +32,9 @@ use List::Util qw( any );
use Koha::SearchEngine::Elasticsearch;
use Koha::SearchEngine::Elasticsearch::Search;
my $schema = Koha::Database->new->schema;
$schema->storage->txn_begin;
subtest '_read_configuration() tests' => sub {
plan tests => 10;
@ -336,7 +340,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () tests'
MARC::Field->new('999', '', '', c => '1234568'),
MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric', o => $long_callno),
);
my $records = [$marc_record_1, $marc_record_2];
my $records = [ $marc_record_1, $marc_record_2 ];
$see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first
@ -616,7 +620,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents_array () t
MARC::Field->new('999', '', '', c => '1234568'),
MARC::Field->new('952', '', '', 0 => 1, g => 'string where should be numeric'),
);
my $records = [$marc_record_1, $marc_record_2];
my $records = [ $marc_record_1, $marc_record_2 ];
$see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first
@ -642,6 +646,12 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori
t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709');
my $builder = t::lib::TestBuilder->new;
my $auth_type = $builder->build_object({ class => 'Koha::Authority::Types', value =>{
auth_tag_to_report => '150'
}
});
my @mappings = (
{
name => 'match',
@ -652,57 +662,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori
sort => 0,
marc_type => 'marc21',
marc_field => '150(ae)',
},
{
name => 'heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150a',
},
{
name => 'heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150(ae)',
},
{
name => 'heading-main',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150a',
},
{
name => 'heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150',
},
{
name => 'match-heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150',
},
}
);
my $se = Test::MockModule->new('Koha::SearchEngine::Elasticsearch');
@ -735,7 +695,7 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori
$marc_record_2->append_fields(
MARC::Field->new('150', '', '', a => 'Subject', v => 'Genresubdiv', z => 'Geosubdiv', x => 'Generalsubdiv', e => 'wrongsubdiv' ),
);
my $records = [$marc_record_1, $marc_record_2];
my $records = [ $marc_record_1, $marc_record_2 ];
$see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first
@ -752,3 +712,5 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authori
"Second record match-heading should contain the correctly formatted heading without wrong subfield"
);
};
$schema->storage->txn_rollback;
載入中…
取消
儲存