From 016bb29b60effec161e71ba34266acb27038afba Mon Sep 17 00:00:00 2001 From: Ere Maijala Date: Tue, 22 Jan 2019 14:38:32 +0200 Subject: [PATCH] Bug 21958: Fix bibliographic record field comparison with authority This makes the comparison between bibliographic field and authority field more robust and per subfield. This makes the comparison not consider the same e.g. the following fields: $a Test User $a Test $b User The actual issue cannot be as easily reproduced with the patches for bug 21826 applied, but here's a test plan anyway: 1. Make sure tests pass (especially t/db_dependent/AuthoritiesMarc*) 2. Make sure authority linking still works properly 3. Make sure authority and biblio frameworks allow subfield i 4. Make sure that even if you add subfield i to 700 in biblio, authority link is kept the same 5. Make sure that even if you add subfield i to the authority record, the authority link is kept the same Signed-off-by: Frank Hansen Signed-off-by: Martin Renvoize Signed-off-by: Jonathan Druart --- C4/AuthoritiesMarc.pm | 121 ++++++++++++----------- C4/Biblio.pm | 4 +- C4/Heading.pm | 2 + C4/Heading/MARC21.pm | 122 +++++++++++++++++++++--- t/db_dependent/AuthoritiesMarc.t | 28 +++++- t/db_dependent/AuthoritiesMarc/MARC21.t | 91 ++++++++++++++++++ t/db_dependent/Biblio.t | 3 + 7 files changed, 297 insertions(+), 74 deletions(-) create mode 100755 t/db_dependent/AuthoritiesMarc/MARC21.t diff --git a/C4/AuthoritiesMarc.pm b/C4/AuthoritiesMarc.pm index 2d80c417a2..1bb808bb9c 100644 --- a/C4/AuthoritiesMarc.pm +++ b/C4/AuthoritiesMarc.pm @@ -1,6 +1,7 @@ package C4::AuthoritiesMarc; # Copyright 2000-2002 Katipo Communications +# Copyright 2018 The National Library of Finland, University of Helsinki # # This file is part of Koha. # @@ -338,10 +339,12 @@ sub GuessAuthTypeCode { '110'=>{authtypecode=>'CORPO_NAME'}, '111'=>{authtypecode=>'MEETI_NAME'}, '130'=>{authtypecode=>'UNIF_TITLE'}, + '147'=>{authtypecode=>'NAME_EVENT'}, '148'=>{authtypecode=>'CHRON_TERM'}, '150'=>{authtypecode=>'TOPIC_TERM'}, '151'=>{authtypecode=>'GEOGR_NAME'}, '155'=>{authtypecode=>'GENRE/FORM'}, + '162'=>{authtypecode=>'MED_PERFRM'}, '180'=>{authtypecode=>'GEN_SUBDIV'}, '181'=>{authtypecode=>'GEO_SUBDIV'}, '182'=>{authtypecode=>'CHRON_SUBD'}, @@ -922,6 +925,8 @@ sub BuildSummary { # construct MARC21 summary # FIXME - looping over 1XX is questionable # since MARC21 authority should have only one 1XX + use C4::Heading::MARC21; + my $handler = C4::Heading::MARC21->new(); my $subfields_to_report; foreach my $field ($record->field('1..')) { my $tag = $field->tag(); @@ -929,31 +934,9 @@ sub BuildSummary { # FIXME - 152 is not a good tag to use # in MARC21 -- purely local tags really ought to be # 9XX - if ($tag eq '100') { - $subfields_to_report = 'abcdefghjklmnopqrstvxyz'; - } elsif ($tag eq '110') { - $subfields_to_report = 'abcdefghklmnoprstvxyz'; - } elsif ($tag eq '111') { - $subfields_to_report = 'acdefghklnpqstvxyz'; - } elsif ($tag eq '130') { - $subfields_to_report = 'adfghklmnoprstvxyz'; - } elsif ($tag eq '148') { - $subfields_to_report = 'abvxyz'; - } elsif ($tag eq '150') { - $subfields_to_report = 'abvxyz'; - } elsif ($tag eq '151') { - $subfields_to_report = 'avxyz'; - } elsif ($tag eq '155') { - $subfields_to_report = 'abvxyz'; - } elsif ($tag eq '180') { - $subfields_to_report = 'vxyz'; - } elsif ($tag eq '181') { - $subfields_to_report = 'vxyz'; - } elsif ($tag eq '182') { - $subfields_to_report = 'vxyz'; - } elsif ($tag eq '185') { - $subfields_to_report = 'vxyz'; - } + + $subfields_to_report = $handler->get_auth_heading_subfields_to_report($tag); + if ($subfields_to_report) { push @authorized, { heading => $field->as_string($subfields_to_report), @@ -1077,44 +1060,47 @@ sub GetAuthorizedHeading { return $field->as_string('abcdefghijlmnopqrstuvwxyz'); } } else { + use C4::Heading::MARC21; + my $handler = C4::Heading::MARC21->new(); + foreach my $field ($record->field('1..')) { - my $tag = $field->tag(); - next if "152" eq $tag; -# FIXME - 152 is not a good tag to use -# in MARC21 -- purely local tags really ought to be -# 9XX - if ($tag eq '100') { - return $field->as_string('abcdefghjklmnopqrstvxyz68'); - } elsif ($tag eq '110') { - return $field->as_string('abcdefghklmnoprstvxyz68'); - } elsif ($tag eq '111') { - return $field->as_string('acdefghklnpqstvxyz68'); - } elsif ($tag eq '130') { - return $field->as_string('adfghklmnoprstvxyz68'); - } elsif ($tag eq '148') { - return $field->as_string('abvxyz68'); - } elsif ($tag eq '150') { - return $field->as_string('abvxyz68'); - } elsif ($tag eq '151') { - return $field->as_string('avxyz68'); - } elsif ($tag eq '155') { - return $field->as_string('abvxyz68'); - } elsif ($tag eq '180') { - return $field->as_string('vxyz68'); - } elsif ($tag eq '181') { - return $field->as_string('vxyz68'); - } elsif ($tag eq '182') { - return $field->as_string('vxyz68'); - } elsif ($tag eq '185') { - return $field->as_string('vxyz68'); - } else { - return $field->as_string(); - } + my $subfields = $handler->get_valid_bib_heading_subfields($field->tag()); + return $field->as_string($subfields) if ($subfields); } } return; } +=head2 CompareFieldWithAuthority + + $match = &CompareFieldWithAuthority({ field => $field, authid => $authid }) + +Takes a MARC::Field from a bibliographic record and an authid, and returns true if they match. + +=cut + +sub CompareFieldWithAuthority { + my $args = shift; + + my $record = GetAuthority($args->{authid}); + return unless (ref $record eq 'MARC::Record'); + if (C4::Context->preference('marcflavour') eq 'UNIMARC') { + # UNIMARC has same subfields for bibs and authorities + foreach my $field ($record->field('2..')) { + return compare_fields($field, $args->{field}, 'abcdefghijlmnopqrstuvwxyz'); + } + } else { + use C4::Heading::MARC21; + my $handler = C4::Heading::MARC21->new(); + + foreach my $field ($record->field('1..')) { + my $subfields = $handler->get_valid_bib_heading_subfields($field->tag()); + return compare_fields($field, $args->{field}, $subfields) if ($subfields); + } + } + return 0; +} + =head2 BuildAuthHierarchies $text= &BuildAuthHierarchies( $authid, $force) @@ -1576,6 +1562,26 @@ sub get_auth_type_location { } } +=head2 compare_fields + + my match = compare_fields($field1, $field2, 'abcde'); + +Compares the listed subfields of both fields and return true if they all match + +=cut + +sub compare_fields { + my ($field1, $field2, $subfields) = @_; + + foreach my $subfield (split(//, $subfields)) { + my $subfield1 = $field1->subfield($subfield) // ''; + my $subfield2 = $field2->subfield($subfield) // ''; + return 0 unless $subfield1 eq $subfield2; + } + return 1; +} + + END { } # module clean-up code here (global destructor) 1; @@ -1586,6 +1592,7 @@ __END__ Koha Development Team Paul POULAIN paul.poulain@free.fr +Ere Maijala ere.maijala@helsinki.fi =cut diff --git a/C4/Biblio.pm b/C4/Biblio.pm index b482a99814..f6c89f92e8 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -714,9 +714,7 @@ sub _check_valid_auth_link { my ( $authid, $field ) = @_; require C4::AuthoritiesMarc; - my $authorized_heading = - C4::AuthoritiesMarc::GetAuthorizedHeading( { 'authid' => $authid } ) || ''; - return ($field->as_string('abcdefghijklmnopqrstuvwxyz') eq $authorized_heading); + return C4::AuthoritiesMarc::CompareFieldWithAuthority( { 'field' => $field, 'authid' => $authid } ); } =head2 GetBiblioData diff --git a/C4/Heading.pm b/C4/Heading.pm index c58b507fef..c65c127736 100644 --- a/C4/Heading.pm +++ b/C4/Heading.pm @@ -172,6 +172,8 @@ sub preferred_authorities { if (C4::Heading::valid_heading_subfield('100', 'e', '')) ... +Check if the given subfield is valid for the given field. + =cut sub valid_heading_subfield { diff --git a/C4/Heading/MARC21.pm b/C4/Heading/MARC21.pm index 5802914438..6a1a3346b2 100644 --- a/C4/Heading/MARC21.pm +++ b/C4/Heading/MARC21.pm @@ -68,6 +68,52 @@ my $bib_heading_fields = { subfields => 'adfghklmnoprst', main_entry => 1 }, + '147' => { + auth_type => 'NAME_EVENT', + subfields => 'acdgvxyz68', + main_entry => 1 + }, + '148' => { + auth_type => 'CHRON_TERM', + subfields => 'abvxyz68', + main_entry => 1 + }, + '150' => { + auth_type => 'TOPIC_TERM', + subfields => 'abvxyz68', + main_entry => 1 + }, + '151' => { + auth_type => 'GEOGR_NAME', + subfields => 'avxyz68', + main_entry => 1 + }, + '155' => { + auth_type => 'GENRE/FORM', + subfields => 'abvxyz68', + main_entry => 1 + }, + '162' => { + auth_type => 'MED_PERFRM', + subfields => 'a68', + main_entry => 1 + }, + '180' => { + auth_type => 'TOPIC_TERM', + subfields => 'vxyz68' + }, + '181' => { + auth_type => 'GEOGR_NAME', + subfields => 'vxyz68' + }, + '182' => { + auth_type => 'CHRON_TERM', + subfields => 'vxyz68' + }, + '185' => { + auth_type => 'GENRE/FORM', + subfields => 'vxyz68' + }, '440' => { auth_type => 'UNIF_TITLE', subfields => 'anp', series => 1 }, '600' => { auth_type => 'PERSO_NAME', @@ -122,44 +168,70 @@ my $bib_heading_fields = { my $auth_heading_fields = { '100' => { auth_type => 'PERSO_NAME', - subfields => 'abcdfghjklmnopqrstvxyz', + subfields => 'abcdefghjklmnopqrstvxyz68', main_entry => 1 }, '110' => { auth_type => 'CORPO_NAME', - subfields => 'abcdfghklmnoprstvxyz', + subfields => 'abcdefghklmnoprstvxyz68', main_entry => 1 }, '111' => { auth_type => 'MEETI_NAME', - subfields => 'acdfghjklnpqstvxyz', + subfields => 'acdefghklnpqstvxyz68', main_entry => 1 }, '130' => { auth_type => 'UNIF_TITLE', - subfields => 'adfghklmnoprstvxyz', + subfields => 'adfghklmnoprstvxyz68', + main_entry => 1 + }, + '147' => { + auth_type => 'NAME_EVENT', + subfields => 'acdgvxyz68', main_entry => 1 }, '148' => { - auth_type => 'CHRON_TERM', - subfields => 'avxyz', + auth_type => 'CHRON_TERM', + subfields => 'abvxyz68', main_entry => 1 }, '150' => { - auth_type => 'TOPIC_TERM', - subfields => 'abgvxyz', + auth_type => 'TOPIC_TERM', + subfields => 'abvxyz68', main_entry => 1 }, '151' => { - auth_type => 'GEOG_NAME', - subfields => 'agvxyz', + auth_type => 'GEOG_NAME', + subfields => 'avxyz68', main_entry => 1 }, '155' => { - auth_type => 'GENRE/FORM', - subfields => 'agvxyz', + auth_type => 'GENRE/FORM', + subfields => 'abvxyz68', main_entry => 1 - } + }, + '162' => { + auth_type => 'MED_PERFRM', + subfields => 'a68', + main_entry => 1 + }, + '180' => { + auth_type => 'TOPIC_TERM', + subfields => 'vxyz68', + }, + '181' => { + auth_type => 'GEOGR_NAME', + subfields => 'vxyz68', + }, + '182' => { + auth_type => 'CHRON_TERM', + subfields => 'vxyz68', + }, + '185' => { + auth_type => 'GENRE/FORM', + subfields => 'vxyz68', + }, }; =head2 subdivisions @@ -224,6 +296,30 @@ sub valid_heading_subfield { return 0; } +=head2 get_valid_bib_heading_subfields + +=cut + +sub get_valid_bib_heading_subfields { + my $self = shift; + my $tag = shift; + + return $bib_heading_fields->{$tag}->{subfields} // undef; +} + +=head2 get_auth_heading_subfields_to_report + +=cut + +sub get_auth_heading_subfields_to_report { + my $self = shift; + my $tag = shift; + + my $subfields = $auth_heading_fields->{$tag}->{subfields} // ''; + $subfields =~ s/[68]//; + return $subfields; +} + =head2 parse_heading Given a field and an indicator to specify if it is an authority field or biblio field we return diff --git a/t/db_dependent/AuthoritiesMarc.t b/t/db_dependent/AuthoritiesMarc.t index 50bbd6edb4..8e626f3f5c 100755 --- a/t/db_dependent/AuthoritiesMarc.t +++ b/t/db_dependent/AuthoritiesMarc.t @@ -5,9 +5,10 @@ use Modern::Perl; -use Test::More tests => 11; +use Test::More tests => 12; use Test::MockModule; use Test::Warn; +use MARC::Field; use MARC::Record; use t::lib::Mocks; @@ -54,6 +55,11 @@ $module->mock('GetAuthority', sub { [ '151', ' ', ' ', a => 'New York (City)' ], [ '551', ' ', ' ', a => 'New York (State)', w => 'g' ] ); + } elsif ($authid eq '5') { + $record->add_fields( + [ '001', '5' ], + [ '100', ' ', ' ', a => 'Lastname, Firstname', b => 'b', c => 'c', i => 'i' ] + ); } else { undef $record; } @@ -214,6 +220,26 @@ subtest 'AddAuthority should respect AUTO_INCREMENT (BZ 18104)' => sub { is( $record->field('001')->data, $id3, 'Check updated 001' ); }; +subtest 'CompareFieldWithAuthority tests' => sub { + plan tests => 3; + + t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); + + $builder->build({ source => 'AuthType', value => { authtypecode => 'PERSO_NAME' }}); + + my $field = MARC::Field->new('100', 0, 0, a => 'Lastname, Firstname', b => 'b', c => 'c'); + + ok(C4::AuthoritiesMarc::CompareFieldWithAuthority({'field' => $field, 'authid' => 5}), 'Authority matches'); + + $field->add_subfields(i => 'X'); + + ok(C4::AuthoritiesMarc::CompareFieldWithAuthority({'field' => $field, 'authid' => 5}), 'Compare ignores unlisted subfields'); + + $field->add_subfields(d => 'd'); + + ok(!C4::AuthoritiesMarc::CompareFieldWithAuthority({'field' => $field, 'authid' => 5}), 'Authority does not match'); +}; + $schema->storage->txn_rollback; $module->unmock('GetAuthority'); diff --git a/t/db_dependent/AuthoritiesMarc/MARC21.t b/t/db_dependent/AuthoritiesMarc/MARC21.t new file mode 100755 index 0000000000..77047e4bbc --- /dev/null +++ b/t/db_dependent/AuthoritiesMarc/MARC21.t @@ -0,0 +1,91 @@ +#!/usr/bin/perl +# + +use strict; +use warnings; + +use Test::MockModule; +use Test::More tests => 2; +use MARC::Field; +use MARC::Record; + +use t::lib::Mocks; +use t::lib::TestBuilder; + +BEGIN { + use_ok('C4::AuthoritiesMarc::MARC21'); +} + +my $schema = Koha::Database->new->schema; +$schema->storage->txn_begin; +my $dbh = C4::Context->dbh; +my $builder = t::lib::TestBuilder->new; + +t::lib::Mocks::mock_preference('marcflavour', 'MARC21'); + +subtest 'CompareFieldWithAuthority tests' => sub { + plan tests => 3; + + # We are now going to be testing the authorities hierarchy code, and + # therefore need to pretend that we have consistent data in our database + my $module = new Test::MockModule('C4::AuthoritiesMarc'); + $module->mock('GetHeaderAuthority', sub { + return {'authtrees' => ''}; + }); + $module->mock('AddAuthorityTrees', sub { + return; + }); + $module->mock('GetAuthority', sub { + my ($authid) = @_; + my $record = MARC::Record->new(); + if ($authid eq '1') { + $record->add_fields( + [ '001', '1' ], + [ '151', ' ', ' ', a => 'United States' ] + ); + } elsif ($authid eq '2') { + $record->add_fields( + [ '001', '2' ], + [ '151', ' ', ' ', a => 'New York (State)' ], + [ '551', ' ', ' ', a => 'United States', w => 'g', 9 => '1' ] + ); + } elsif ($authid eq '3') { + $record->add_fields( + [ '001', '3' ], + [ '151', ' ', ' ', a => 'New York (City)' ], + [ '551', ' ', ' ', a => 'New York (State)', w => 'g', 9 => '2' ] + ); + } elsif ($authid eq '4') { + $record->add_fields( + [ '001', '4' ], + [ '151', ' ', ' ', a => 'New York (City)' ], + [ '551', ' ', ' ', a => 'New York (State)', w => 'g' ] + ); + } elsif ($authid eq '5') { + $record->add_fields( + [ '001', '5' ], + [ '100', ' ', ' ', a => 'Lastname, Firstname', b => 'b', c => 'c', i => 'i' ] + ); + } else { + undef $record; + } + return $record; + }); + + $dbh->do('DELETE FROM auth_types'); + $builder->build({ source => 'AuthType', value => { authtypecode => 'PERSO_NAME' }}); + + my $field = MARC::Field->new('100', 0, 0, a => 'Lastname, Firstname', b => 'b', c => 'c'); + + ok(C4::AuthoritiesMarc::CompareFieldWithAuthority({'field' => $field, 'authid' => 5}), 'Authority matches'); + + $field->add_subfields(i => 'X'); + + ok(C4::AuthoritiesMarc::CompareFieldWithAuthority({'field' => $field, 'authid' => 5}), 'Compare ignores unlisted subfields'); + + $field->add_subfields(d => 'd'); + + ok(!C4::AuthoritiesMarc::CompareFieldWithAuthority({'field' => $field, 'authid' => 5}), 'Authority does not match'); +}; + +$schema->storage->txn_rollback; diff --git a/t/db_dependent/Biblio.t b/t/db_dependent/Biblio.t index 4e69b3e9b5..5d59de3de0 100755 --- a/t/db_dependent/Biblio.t +++ b/t/db_dependent/Biblio.t @@ -230,6 +230,9 @@ sub run_tests { my $marcflavour = shift; t::lib::Mocks::mock_preference('marcflavour', $marcflavour); + # Authority tests don't interact well with Elasticsearch at the moment due to the fact that there's currently no way to + # roll back ES index changes. + t::lib::Mocks::mock_preference('SearchEngine', 'Zebra'); my $isbn = '0590353403'; my $title = 'Foundation'; -- 2.39.5