From d4783f244b9b8bfe786849c8bc5f46489999a7af Mon Sep 17 00:00:00 2001 From: Nick Clemens Date: Tue, 14 Jan 2020 13:00:03 +0000 Subject: [PATCH] Bug 24269: Adjust C4::Heading to generate headings from auth tags To test: 1 - Be using Elasticsearch 2 - Check that field 150 is mapped to 'Match-heading' or add it (the subfields don't matter) 3 - Add a topic term authority record like "150 $aCats$vFiction" 4 - Add a 650 with $aCats and $vFiction and $e depicted to a bibliographic record 5 - Run the linker for the bib perl misc/link_bibs_to_authorities.pl --bib-limit "biblionumber=89" -v 6 - Confirm the record is not correctly linked to the record 7 - Apply patch 8 - Reindex authorities for ES perl misc/search_tools/rebuild_elasticsearch.pl -v -d -a 9 - Run the linker and confirm record is correctly linked perl misc/link_bibs_to_authorities.pl --bib-limit "biblionumber=89" -v Signed-off-by: Martin Renvoize Signed-off-by: Bouzid Fergani Signed-off-by: Martin Renvoize --- C4/Biblio.pm | 4 +- C4/Heading.pm | 34 +++++------ C4/Heading/MARC21.pm | 69 +++++++++++++++++++--- C4/Heading/UNIMARC.pm | 8 +-- C4/Linker/Default.pm | 2 +- Koha/SearchEngine/Elasticsearch.pm | 16 ++++- t/Koha/SearchEngine/Elasticsearch.t | 92 ++++++++++++++++++++++++++++- t/db_dependent/Heading.t | 33 ++++++----- t/db_dependent/Heading_MARC21.t | 4 +- t/db_dependent/Linker_Default.t | 6 +- t/db_dependent/Linker_FirstMatch.t | 2 +- 11 files changed, 214 insertions(+), 56 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index da58af2e4a..f745b93d32 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -501,7 +501,7 @@ sub LinkBibHeadingsToAuthorities { $allowrelink = 1 unless defined $allowrelink; my $num_headings_changed = 0; foreach my $field ( $bib->fields() ) { - my $heading = C4::Heading->new_from_bib_field( $field, $frameworkcode ); + my $heading = C4::Heading->new_from_field( $field, $frameworkcode ); next unless defined $heading; # check existing $9 @@ -545,7 +545,7 @@ sub LinkBibHeadingsToAuthorities { my @auth_subfields; foreach my $subfield ( $field->subfields() ){ if ( $subfield->[0] =~ /[A-z]/ - && C4::Heading::valid_bib_heading_subfield( + && C4::Heading::valid_heading_subfield( $field->tag, $subfield->[0] ) ){ push @auth_subfields, $subfield->[0] => $subfield->[1]; diff --git a/C4/Heading.pm b/C4/Heading.pm index 5dfd0fadb0..c58b507fef 100644 --- a/C4/Heading.pm +++ b/C4/Heading.pm @@ -33,7 +33,7 @@ C4::Heading =head1 SYNOPSIS use C4::Heading; - my $heading = C4::Heading->new_from_bib_field($field, $frameworkcode); + my $heading = C4::Heading->new_from_field($field, $frameworkcode); my $thesaurus = $heading->thesaurus(); my $type = $heading->type(); my $display_heading = $heading->display_form(); @@ -46,32 +46,30 @@ headings found in bibliographic and authority records. =head1 METHODS -=head2 new_from_bib_field +=head2 new_from_field - my $heading = C4::Heading->new_from_bib_field($field, $frameworkcode, [, $marc_flavour]); + my $heading = C4::Heading->new_from_field($field, $frameworkcode, [, $auth]); Given a C object containing a heading from a bib record, create a C object. -The optional second parameter is the MARC flavour (i.e., MARC21 -or UNIMARC); if this parameter is not supplied, it is -taken from the Koha application context. +The optional third parameter is 'auth' - it is handled as boolean. If supplied we treat the field as an auth record field. Otherwise if it is a bib field. The fields checked are the same in a UNIMARC system and this parameter is ignored If the MARC field supplied is not a valid heading, undef is returned. =cut -sub new_from_bib_field { +sub new_from_field { my $class = shift; my $field = shift; - my $frameworkcode = shift; - my $marcflavour = @_ ? shift : C4::Context->preference('marcflavour'); - + my $frameworkcode = shift; #FIXME this is not used? + my $auth = shift; + my $marcflavour = C4::Context->preference('marcflavour'); my $marc_handler = _marc_format_handler($marcflavour); my $tag = $field->tag(); - return unless $marc_handler->valid_bib_heading_tag( $tag, $frameworkcode ); + return unless $marc_handler->valid_heading_tag( $tag, $frameworkcode, $auth ); my $self = {}; $self->{'field'} = $field; @@ -79,7 +77,7 @@ sub new_from_bib_field { $self->{'auth_type'}, $self->{'thesaurus'}, $self->{'search_form'}, $self->{'display_form'}, $self->{'match_type'} - ) = $marc_handler->parse_heading($field); + ) = $marc_handler->parse_heading($field, $auth ); bless $self, $class; return $self; @@ -170,20 +168,20 @@ sub preferred_authorities { return $results; } -=head2 valid_bib_heading_subfield +=head2 valid_heading_subfield - if (C4::Heading::valid_bib_heading_subfield('100', 'e', '')) ... + if (C4::Heading::valid_heading_subfield('100', 'e', '')) ... =cut -sub valid_bib_heading_subfield { +sub valid_heading_subfield { my $tag = shift; my $subfield = shift; - my $marcflavour = @_ ? shift : C4::Context->preference('marcflavour'); + my $marcflavour = C4::Context->preference('marcflavour'); + my $auth = shift; my $marc_handler = _marc_format_handler($marcflavour); - - return $marc_handler->valid_bib_heading_subfield( $tag, $subfield ); + return $marc_handler->valid_heading_subfield( $tag, $subfield, $auth ); } =head1 INTERNAL METHODS diff --git a/C4/Heading/MARC21.pm b/C4/Heading/MARC21.pm index 0fc3374470..5802914438 100644 --- a/C4/Heading/MARC21.pm +++ b/C4/Heading/MARC21.pm @@ -119,6 +119,49 @@ my $bib_heading_fields = { { auth_type => 'UNIF_TITLE', subfields => 'adfghklmnoprst', series => 1 }, }; +my $auth_heading_fields = { + '100' => { + auth_type => 'PERSO_NAME', + subfields => 'abcdfghjklmnopqrstvxyz', + main_entry => 1 + }, + '110' => { + auth_type => 'CORPO_NAME', + subfields => 'abcdfghklmnoprstvxyz', + main_entry => 1 + }, + '111' => { + auth_type => 'MEETI_NAME', + subfields => 'acdfghjklnpqstvxyz', + main_entry => 1 + }, + '130' => { + auth_type => 'UNIF_TITLE', + subfields => 'adfghklmnoprstvxyz', + main_entry => 1 + }, + '148' => { + auth_type => 'CHRON_TERM', + subfields => 'avxyz', + main_entry => 1 + }, + '150' => { + auth_type => 'TOPIC_TERM', + subfields => 'abgvxyz', + main_entry => 1 + }, + '151' => { + auth_type => 'GEOG_NAME', + subfields => 'agvxyz', + main_entry => 1 + }, + '155' => { + auth_type => 'GENRE/FORM', + subfields => 'agvxyz', + main_entry => 1 + } +}; + =head2 subdivisions =cut @@ -143,16 +186,18 @@ sub new { return bless {}, $class; } -=head2 valid_bib_heading_tag +=head2 valid_heading_tag =cut -sub valid_bib_heading_tag { +sub valid_heading_tag { my $self = shift; my $tag = shift; my $frameworkcode = shift; + my $auth = shift; + my $heading_fields = $auth ? { %$auth_heading_fields } : { %$bib_heading_fields }; - if ( exists $bib_heading_fields->{$tag} ) { + if ( exists $heading_fields->{$tag} ) { return 1; } else { @@ -161,32 +206,40 @@ sub valid_bib_heading_tag { } -=head2 valid_bib_heading_subfield +=head2 valid_heading_subfield =cut -sub valid_bib_heading_subfield { +sub valid_heading_subfield { my $self = shift; my $tag = shift; my $subfield = shift; + my $auth = shift; + + my $heading_fields = $auth ? { %$auth_heading_fields } : { %$bib_heading_fields }; - if ( exists $bib_heading_fields->{$tag} ) { - return 1 if ($bib_heading_fields->{$tag}->{subfields} =~ /$subfield/); + if ( exists $heading_fields->{$tag} ) { + return 1 if ($heading_fields->{$tag}->{subfields} =~ /$subfield/); } return 0; } =head2 parse_heading +Given a field and an indicator to specify if it is an authority field or biblio field we return +the correct type, thesauarus, search form, and display form of the heading. + =cut sub parse_heading { my $self = shift; my $field = shift; + my $auth = shift; my $tag = $field->tag; - my $field_info = $bib_heading_fields->{$tag}; + my $heading_fields = $auth ? { %$auth_heading_fields } : { %$bib_heading_fields }; + my $field_info = $heading_fields->{$tag}; my $auth_type = $field_info->{'auth_type'}; my $thesaurus = $tag =~ m/6../ diff --git a/C4/Heading/UNIMARC.pm b/C4/Heading/UNIMARC.pm index 81dafaef83..f1b5287708 100644 --- a/C4/Heading/UNIMARC.pm +++ b/C4/Heading/UNIMARC.pm @@ -87,20 +87,20 @@ sub new { return bless {}, $class; } -=head2 valid_bib_heading_tag +=head2 valid_heading_tag =cut -sub valid_bib_heading_tag { +sub valid_heading_tag { my ( $self, $tag ) = @_; return $bib_heading_fields->{$tag}; } -=head2 valid_bib_heading_subfield +=head2 valid_heading_subfield =cut -sub valid_bib_heading_subfield { +sub valid_heading_subfield { my $self = shift; my $tag = shift; my $subfield = shift; diff --git a/C4/Linker/Default.pm b/C4/Linker/Default.pm index 1fa395c19a..bfe1d72308 100644 --- a/C4/Linker/Default.pm +++ b/C4/Linker/Default.pm @@ -68,7 +68,7 @@ sub get_link { map { $_->[0] => $_->[1] } @subfields ); ( $authid, $fuzzy ) = - $self->get_link( C4::Heading->new_from_bib_field($field), + $self->get_link( C4::Heading->new_from_field($field), $behavior ); } } diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm index 7322f84178..7ba3e14f70 100644 --- a/Koha/SearchEngine/Elasticsearch.pm +++ b/Koha/SearchEngine/Elasticsearch.pm @@ -26,6 +26,7 @@ use Koha::Exceptions::Config; use Koha::Exceptions::Elasticsearch; use Koha::SearchFields; use Koha::SearchMarcMaps; +use C4::Heading; use Carp; use Clone qw(clone); @@ -470,7 +471,6 @@ sub marc_records_to_documents { } my $data_field_rules = $data_fields_rules->{$tag}; - if ($data_field_rules) { my $subfields_mappings = $data_field_rules->{subfields}; my $wildcard_mappings = $subfields_mappings->{'*'}; @@ -483,6 +483,13 @@ sub marc_records_to_documents { if (@{$mappings}) { $self->_process_mappings($mappings, $data, $record_document, $altscript); } + if ( defined @{$mappings}[0] && grep /match-heading/, @{@{$mappings}[0]} ){ + # Used by the authority linker the match-heading field requires a specific syntax + # that is specified in C4/Heading + my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading + next unless $heading; + push @{$record_document->{'match-heading'}}, $heading->search_form; + } } my $subfields_join_mappings = $data_field_rules->{subfields_join}; @@ -499,6 +506,13 @@ sub marc_records_to_documents { if ($data) { $self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, $altscript); } + if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){ + # Used by the authority linker the match-heading field requires a specific syntax + # that is specified in C4/Heading + my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading + next unless $heading; + push @{$record_document->{'match-heading'}}, $heading->search_form; + } } } } diff --git a/t/Koha/SearchEngine/Elasticsearch.t b/t/Koha/SearchEngine/Elasticsearch.t index e6f244d909..0d76dd17d9 100644 --- a/t/Koha/SearchEngine/Elasticsearch.t +++ b/t/Koha/SearchEngine/Elasticsearch.t @@ -17,7 +17,7 @@ use Modern::Perl; -use Test::More tests => 5; +use Test::More tests => 6; use Test::Exception; use t::lib::Mocks; @@ -26,6 +26,7 @@ use Test::MockModule; use MARC::Record; use Try::Tiny; +use List::Util qw( any ); use Koha::SearchEngine::Elasticsearch; use Koha::SearchEngine::Elasticsearch::Search; @@ -610,3 +611,92 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents_array () t ok($decoded_marc_record->isa('MARC::Record'), "ARRAY record successfully decoded from result"); is($decoded_marc_record->as_usmarc(), $marc_record_1->as_usmarc(), "Decoded ARRAY record has same data as original record"); }; + +subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authority tests' => sub { + + plan tests => 2; + + t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); + t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709'); + + my @mappings = ( + { + name => 'match-heading', + type => 'string', + facet => 0, + suggestible => 0, + searchable => 1, + sort => undef, + marc_type => 'marc21', + marc_field => '150', + }, + { + name => 'match-heading', + type => 'string', + facet => 0, + suggestible => 0, + searchable => 1, + sort => 0, + marc_type => 'marc21', + marc_field => '150a', + }, + { + name => 'match-heading', + type => 'string', + facet => 0, + suggestible => 0, + searchable => 1, + sort => 0, + marc_type => 'marc21', + marc_field => '150(ae)', + }, + ); + + my $se = Test::MockModule->new('Koha::SearchEngine::Elasticsearch'); + $se->mock('_foreach_mapping', sub { + my ($self, $sub) = @_; + + foreach my $map (@mappings) { + $sub->( + $map->{name}, + $map->{type}, + $map->{facet}, + $map->{suggestible}, + $map->{sort}, + $map->{searchable}, + $map->{marc_type}, + $map->{marc_field} + ); + } + }); + + my $see = Koha::SearchEngine::Elasticsearch::Search->new({ index => $Koha::SearchEngine::Elasticsearch::AUTHORITIES_INDEX }); + + my $marc_record_1 = MARC::Record->new(); + $marc_record_1->append_fields( + MARC::Field->new('001', '123'), + MARC::Field->new('007', 'ku'), + MARC::Field->new('020', '', '', a => '1-56619-909-3'), + MARC::Field->new('150', '', '', a => 'Subject', v => 'Genresubdiv', x => 'Generalsubdiv', z => 'Geosubdiv'), + ); + my $marc_record_2 = MARC::Record->new(); + $marc_record_2->append_fields( + MARC::Field->new('150', '', '', a => 'Subject', v => 'Genresubdiv', z => 'Geosubdiv', x => 'Generalsubdiv', e => 'wrongsubdiv' ), + ); + my $records = [$marc_record_1, $marc_record_2]; + + $see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first + + my $docs = $see->marc_records_to_documents($records); + + ok( + any { $_ eq "Subject formsubdiv Genresubdiv generalsubdiv Generalsubdiv geographicsubdiv Geosubdiv" } + @{$docs->[0]->{'match-heading'}}, + "First record match-heading should contain the correctly formatted heading" + ); + ok( + any { $_ eq "Subject formsubdiv Genresubdiv geographicsubdiv Geosubdiv generalsubdiv Generalsubdiv" } + @{$docs->[1]->{'match-heading'}}, + "Second record match-heading should contain the correctly formatted heading without wrong subfield" + ); +}; diff --git a/t/db_dependent/Heading.t b/t/db_dependent/Heading.t index bcbbe515ec..e2f7064667 100755 --- a/t/db_dependent/Heading.t +++ b/t/db_dependent/Heading.t @@ -27,20 +27,23 @@ BEGIN { } subtest "MARC21 tests" => sub { - plan tests => 7; + plan tests => 9; t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); - ok(C4::Heading::valid_bib_heading_subfield('100', 'a'), '100a valid'); - ok(!C4::Heading::valid_bib_heading_subfield('100', 'e'), '100e not valid'); + ok(C4::Heading::valid_heading_subfield('100', 'a'), '100a valid for bib'); + ok(!C4::Heading::valid_heading_subfield('100', 'e'), '100e not valid for bib'); + + ok(C4::Heading::valid_heading_subfield('100', 'a', 1), '100a valid for authority'); + ok(!C4::Heading::valid_heading_subfield('100', 'e', 1), '100e not valid for authority'); - ok(C4::Heading::valid_bib_heading_subfield('110', 'a'), '110a valid'); - ok(!C4::Heading::valid_bib_heading_subfield('110', 'e'), '110e not valid'); + ok(C4::Heading::valid_heading_subfield('110', 'a'), '110a valid for bib'); + ok(!C4::Heading::valid_heading_subfield('110', 'e'), '110e not valid for bib'); - ok(C4::Heading::valid_bib_heading_subfield('600', 'a'), '600a valid'); - ok(!C4::Heading::valid_bib_heading_subfield('600', 'e'), '600e not valid'); + ok(C4::Heading::valid_heading_subfield('600', 'a'), '600a valid for bib'); + ok(!C4::Heading::valid_heading_subfield('600', 'e'), '600e not valid for bib'); - ok(!C4::Heading::valid_bib_heading_subfield('012', 'a'), '012a invalid field'); + ok(!C4::Heading::valid_heading_subfield('012', 'a'), '012a invalid field for bib'); }; subtest "UNIMARC tests" => sub { @@ -48,14 +51,14 @@ subtest "UNIMARC tests" => sub { t::lib::Mocks::mock_preference('marcflavour', 'UNIMARC'); - ok(C4::Heading::valid_bib_heading_subfield('100', 'a'), '100a valid'); - ok(!C4::Heading::valid_bib_heading_subfield('100', 'i'), '100i not valid'); + ok(C4::Heading::valid_heading_subfield('100', 'a'), '100a valid for bib'); + ok(!C4::Heading::valid_heading_subfield('100', 'i'), '100i not valid fir bib'); - ok(C4::Heading::valid_bib_heading_subfield('110', 'a'), '110a valid'); - ok(!C4::Heading::valid_bib_heading_subfield('110', 'i'), '110i not valid'); + ok(C4::Heading::valid_heading_subfield('110', 'a'), '110a valid for bib'); + ok(!C4::Heading::valid_heading_subfield('110', 'i'), '110i not valid for bib'); - ok(C4::Heading::valid_bib_heading_subfield('600', 'a'), '600a valid'); - ok(!C4::Heading::valid_bib_heading_subfield('600', 'i'), '600i not valid'); + ok(C4::Heading::valid_heading_subfield('600', 'a'), '600a valid for bib'); + ok(!C4::Heading::valid_heading_subfield('600', 'i'), '600i not valid for bib'); - ok(!C4::Heading::valid_bib_heading_subfield('012', 'a'), '012a invalid field'); + ok(!C4::Heading::valid_heading_subfield('012', 'a'), '012a invalid field for bib'); } diff --git a/t/db_dependent/Heading_MARC21.t b/t/db_dependent/Heading_MARC21.t index ba8e2e3e35..a2ea2df0d5 100755 --- a/t/db_dependent/Heading_MARC21.t +++ b/t/db_dependent/Heading_MARC21.t @@ -16,12 +16,12 @@ BEGIN { SKIP: { skip "MARC21 heading tests not applicable to UNIMARC", 2 if C4::Context->preference('marcflavour') eq 'UNIMARC'; my $field = MARC::Field->new( '650', ' ', '0', a => 'Uncles', x => 'Fiction' ); - my $heading = C4::Heading->new_from_bib_field($field); + my $heading = C4::Heading->new_from_field($field); is($heading->display_form(), 'Uncles--Fiction', 'Display form generation'); is($heading->search_form(), 'Uncles generalsubdiv Fiction', 'Search form generation'); $field = MARC::Field->new( '830', ' ', '4', a => 'The dark is rising ;', v => '3' ); - $heading = C4::Heading->new_from_bib_field($field); + $heading = C4::Heading->new_from_field($field); is($heading->display_form(), 'The dark is rising ;', 'Display form generation'); is($heading->search_form(), 'The dark is rising', 'Search form generation'); diff --git a/t/db_dependent/Linker_Default.t b/t/db_dependent/Linker_Default.t index 597e86b39e..19f421373f 100755 --- a/t/db_dependent/Linker_Default.t +++ b/t/db_dependent/Linker_Default.t @@ -50,9 +50,9 @@ subtest 'Test caching in get_link and update_cache' => sub { my $subject_field2 = MARC::Field->new($tags[0],0,2,$tags[1]=>'Science fiction'); my $genre_field = MARC::Field->new($tags[2],0,2,$tags[3]=>'Science fiction'); # Can we build a heading from it? - my $subject_heading = C4::Heading->new_from_bib_field( $subject_field, q{} ); - my $subject_heading2 = C4::Heading->new_from_bib_field( $subject_field, q{} ); - my $genre_heading = C4::Heading->new_from_bib_field( $genre_field, q{} ); + my $subject_heading = C4::Heading->new_from_field( $subject_field, q{} ); + my $subject_heading2 = C4::Heading->new_from_field( $subject_field, q{} ); + my $genre_heading = C4::Heading->new_from_field( $genre_field, q{} ); # Now test to see if C4::Linker can find it. diff --git a/t/db_dependent/Linker_FirstMatch.t b/t/db_dependent/Linker_FirstMatch.t index 04cc6a664b..cfcd752437 100755 --- a/t/db_dependent/Linker_FirstMatch.t +++ b/t/db_dependent/Linker_FirstMatch.t @@ -110,7 +110,7 @@ sub run_tests { my $heading; ok( defined( - $heading = C4::Heading->new_from_bib_field( $new_bibfield, q{} ) + $heading = C4::Heading->new_from_field( $new_bibfield, q{} ) ), 'Creating heading from bib field' ); -- 2.39.5