Browse Source

Bug 24269: Adjust C4::Heading to generate headings from auth tags

To test:
1 - Be using Elasticsearch
2 - Check that field 150 is mapped to 'Match-heading' or add it (the subfields don't matter)
3 - Add a topic term authority record like "150 $aCats$vFiction"
4 - Add a 650 with $aCats and $vFiction and  $e depicted to a bibliographic record
5 - Run the linker for the bib
    perl misc/link_bibs_to_authorities.pl --bib-limit "biblionumber=89" -v
6 - Confirm the record is not correctly linked to the record
7 - Apply patch
8 - Reindex authorities for ES
    perl misc/search_tools/rebuild_elasticsearch.pl -v -d -a
9 - Run the linker and confirm record is correctly linked
    perl misc/link_bibs_to_authorities.pl --bib-limit "biblionumber=89" -v

Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
Signed-off-by: Bouzid Fergani <bouzid.fergani@inlibro.com>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
20.05.x
Nick Clemens 2 years ago
committed by Martin Renvoize
parent
commit
d4783f244b
Signed by: martin.renvoize GPG Key ID: 422B469130441A0F
  1. 4
      C4/Biblio.pm
  2. 34
      C4/Heading.pm
  3. 69
      C4/Heading/MARC21.pm
  4. 8
      C4/Heading/UNIMARC.pm
  5. 2
      C4/Linker/Default.pm
  6. 16
      Koha/SearchEngine/Elasticsearch.pm
  7. 92
      t/Koha/SearchEngine/Elasticsearch.t
  8. 33
      t/db_dependent/Heading.t
  9. 4
      t/db_dependent/Heading_MARC21.t
  10. 6
      t/db_dependent/Linker_Default.t
  11. 2
      t/db_dependent/Linker_FirstMatch.t

4
C4/Biblio.pm

@ -501,7 +501,7 @@ sub LinkBibHeadingsToAuthorities {
$allowrelink = 1 unless defined $allowrelink;
my $num_headings_changed = 0;
foreach my $field ( $bib->fields() ) {
my $heading = C4::Heading->new_from_bib_field( $field, $frameworkcode );
my $heading = C4::Heading->new_from_field( $field, $frameworkcode );
next unless defined $heading;
# check existing $9
@ -545,7 +545,7 @@ sub LinkBibHeadingsToAuthorities {
my @auth_subfields;
foreach my $subfield ( $field->subfields() ){
if ( $subfield->[0] =~ /[A-z]/
&& C4::Heading::valid_bib_heading_subfield(
&& C4::Heading::valid_heading_subfield(
$field->tag, $subfield->[0] )
){
push @auth_subfields, $subfield->[0] => $subfield->[1];

34
C4/Heading.pm

@ -33,7 +33,7 @@ C4::Heading
=head1 SYNOPSIS
use C4::Heading;
my $heading = C4::Heading->new_from_bib_field($field, $frameworkcode);
my $heading = C4::Heading->new_from_field($field, $frameworkcode);
my $thesaurus = $heading->thesaurus();
my $type = $heading->type();
my $display_heading = $heading->display_form();
@ -46,32 +46,30 @@ headings found in bibliographic and authority records.
=head1 METHODS
=head2 new_from_bib_field
=head2 new_from_field
my $heading = C4::Heading->new_from_bib_field($field, $frameworkcode, [, $marc_flavour]);
my $heading = C4::Heading->new_from_field($field, $frameworkcode, [, $auth]);
Given a C<MARC::Field> object containing a heading from a
bib record, create a C<C4::Heading> object.
The optional second parameter is the MARC flavour (i.e., MARC21
or UNIMARC); if this parameter is not supplied, it is
taken from the Koha application context.
The optional third parameter is 'auth' - it is handled as boolean. If supplied we treat the field as an auth record field. Otherwise if it is a bib field. The fields checked are the same in a UNIMARC system and this parameter is ignored
If the MARC field supplied is not a valid heading, undef
is returned.
=cut
sub new_from_bib_field {
sub new_from_field {
my $class = shift;
my $field = shift;
my $frameworkcode = shift;
my $marcflavour = @_ ? shift : C4::Context->preference('marcflavour');
my $frameworkcode = shift; #FIXME this is not used?
my $auth = shift;
my $marcflavour = C4::Context->preference('marcflavour');
my $marc_handler = _marc_format_handler($marcflavour);
my $tag = $field->tag();
return unless $marc_handler->valid_bib_heading_tag( $tag, $frameworkcode );
return unless $marc_handler->valid_heading_tag( $tag, $frameworkcode, $auth );
my $self = {};
$self->{'field'} = $field;
@ -79,7 +77,7 @@ sub new_from_bib_field {
$self->{'auth_type'}, $self->{'thesaurus'},
$self->{'search_form'}, $self->{'display_form'},
$self->{'match_type'}
) = $marc_handler->parse_heading($field);
) = $marc_handler->parse_heading($field, $auth );
bless $self, $class;
return $self;
@ -170,20 +168,20 @@ sub preferred_authorities {
return $results;
}
=head2 valid_bib_heading_subfield
=head2 valid_heading_subfield
if (C4::Heading::valid_bib_heading_subfield('100', 'e', '')) ...
if (C4::Heading::valid_heading_subfield('100', 'e', '')) ...
=cut
sub valid_bib_heading_subfield {
sub valid_heading_subfield {
my $tag = shift;
my $subfield = shift;
my $marcflavour = @_ ? shift : C4::Context->preference('marcflavour');
my $marcflavour = C4::Context->preference('marcflavour');
my $auth = shift;
my $marc_handler = _marc_format_handler($marcflavour);
return $marc_handler->valid_bib_heading_subfield( $tag, $subfield );
return $marc_handler->valid_heading_subfield( $tag, $subfield, $auth );
}
=head1 INTERNAL METHODS

69
C4/Heading/MARC21.pm

@ -119,6 +119,49 @@ my $bib_heading_fields = {
{ auth_type => 'UNIF_TITLE', subfields => 'adfghklmnoprst', series => 1 },
};
my $auth_heading_fields = {
'100' => {
auth_type => 'PERSO_NAME',
subfields => 'abcdfghjklmnopqrstvxyz',
main_entry => 1
},
'110' => {
auth_type => 'CORPO_NAME',
subfields => 'abcdfghklmnoprstvxyz',
main_entry => 1
},
'111' => {
auth_type => 'MEETI_NAME',
subfields => 'acdfghjklnpqstvxyz',
main_entry => 1
},
'130' => {
auth_type => 'UNIF_TITLE',
subfields => 'adfghklmnoprstvxyz',
main_entry => 1
},
'148' => {
auth_type => 'CHRON_TERM',
subfields => 'avxyz',
main_entry => 1
},
'150' => {
auth_type => 'TOPIC_TERM',
subfields => 'abgvxyz',
main_entry => 1
},
'151' => {
auth_type => 'GEOG_NAME',
subfields => 'agvxyz',
main_entry => 1
},
'155' => {
auth_type => 'GENRE/FORM',
subfields => 'agvxyz',
main_entry => 1
}
};
=head2 subdivisions
=cut
@ -143,16 +186,18 @@ sub new {
return bless {}, $class;
}
=head2 valid_bib_heading_tag
=head2 valid_heading_tag
=cut
sub valid_bib_heading_tag {
sub valid_heading_tag {
my $self = shift;
my $tag = shift;
my $frameworkcode = shift;
my $auth = shift;
my $heading_fields = $auth ? { %$auth_heading_fields } : { %$bib_heading_fields };
if ( exists $bib_heading_fields->{$tag} ) {
if ( exists $heading_fields->{$tag} ) {
return 1;
}
else {
@ -161,32 +206,40 @@ sub valid_bib_heading_tag {
}
=head2 valid_bib_heading_subfield
=head2 valid_heading_subfield
=cut
sub valid_bib_heading_subfield {
sub valid_heading_subfield {
my $self = shift;
my $tag = shift;
my $subfield = shift;
my $auth = shift;
my $heading_fields = $auth ? { %$auth_heading_fields } : { %$bib_heading_fields };
if ( exists $bib_heading_fields->{$tag} ) {
return 1 if ($bib_heading_fields->{$tag}->{subfields} =~ /$subfield/);
if ( exists $heading_fields->{$tag} ) {
return 1 if ($heading_fields->{$tag}->{subfields} =~ /$subfield/);
}
return 0;
}
=head2 parse_heading
Given a field and an indicator to specify if it is an authority field or biblio field we return
the correct type, thesauarus, search form, and display form of the heading.
=cut
sub parse_heading {
my $self = shift;
my $field = shift;
my $auth = shift;
my $tag = $field->tag;
my $field_info = $bib_heading_fields->{$tag};
my $heading_fields = $auth ? { %$auth_heading_fields } : { %$bib_heading_fields };
my $field_info = $heading_fields->{$tag};
my $auth_type = $field_info->{'auth_type'};
my $thesaurus =
$tag =~ m/6../

8
C4/Heading/UNIMARC.pm

@ -87,20 +87,20 @@ sub new {
return bless {}, $class;
}
=head2 valid_bib_heading_tag
=head2 valid_heading_tag
=cut
sub valid_bib_heading_tag {
sub valid_heading_tag {
my ( $self, $tag ) = @_;
return $bib_heading_fields->{$tag};
}
=head2 valid_bib_heading_subfield
=head2 valid_heading_subfield
=cut
sub valid_bib_heading_subfield {
sub valid_heading_subfield {
my $self = shift;
my $tag = shift;
my $subfield = shift;

2
C4/Linker/Default.pm

@ -68,7 +68,7 @@ sub get_link {
map { $_->[0] => $_->[1] } @subfields
);
( $authid, $fuzzy ) =
$self->get_link( C4::Heading->new_from_bib_field($field),
$self->get_link( C4::Heading->new_from_field($field),
$behavior );
}
}

16
Koha/SearchEngine/Elasticsearch.pm

@ -26,6 +26,7 @@ use Koha::Exceptions::Config;
use Koha::Exceptions::Elasticsearch;
use Koha::SearchFields;
use Koha::SearchMarcMaps;
use C4::Heading;
use Carp;
use Clone qw(clone);
@ -470,7 +471,6 @@ sub marc_records_to_documents {
}
my $data_field_rules = $data_fields_rules->{$tag};
if ($data_field_rules) {
my $subfields_mappings = $data_field_rules->{subfields};
my $wildcard_mappings = $subfields_mappings->{'*'};
@ -483,6 +483,13 @@ sub marc_records_to_documents {
if (@{$mappings}) {
$self->_process_mappings($mappings, $data, $record_document, $altscript);
}
if ( defined @{$mappings}[0] && grep /match-heading/, @{@{$mappings}[0]} ){
# Used by the authority linker the match-heading field requires a specific syntax
# that is specified in C4/Heading
my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
next unless $heading;
push @{$record_document->{'match-heading'}}, $heading->search_form;
}
}
my $subfields_join_mappings = $data_field_rules->{subfields_join};
@ -499,6 +506,13 @@ sub marc_records_to_documents {
if ($data) {
$self->_process_mappings($subfields_join_mappings->{$subfields_group}, $data, $record_document, $altscript);
}
if ( grep { $_->[0] eq 'match-heading' } @{$subfields_join_mappings->{$subfields_group}} ){
# Used by the authority linker the match-heading field requires a specific syntax
# that is specified in C4/Heading
my $heading = C4::Heading->new_from_field( $field, undef, 1 ); #new auth heading
next unless $heading;
push @{$record_document->{'match-heading'}}, $heading->search_form;
}
}
}
}

92
t/Koha/SearchEngine/Elasticsearch.t

@ -17,7 +17,7 @@
use Modern::Perl;
use Test::More tests => 5;
use Test::More tests => 6;
use Test::Exception;
use t::lib::Mocks;
@ -26,6 +26,7 @@ use Test::MockModule;
use MARC::Record;
use Try::Tiny;
use List::Util qw( any );
use Koha::SearchEngine::Elasticsearch;
use Koha::SearchEngine::Elasticsearch::Search;
@ -610,3 +611,92 @@ subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents_array () t
ok($decoded_marc_record->isa('MARC::Record'), "ARRAY record successfully decoded from result");
is($decoded_marc_record->as_usmarc(), $marc_record_1->as_usmarc(), "Decoded ARRAY record has same data as original record");
};
subtest 'Koha::SearchEngine::Elasticsearch::marc_records_to_documents () authority tests' => sub {
plan tests => 2;
t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
t::lib::Mocks::mock_preference('ElasticsearchMARCFormat', 'ISO2709');
my @mappings = (
{
name => 'match-heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => undef,
marc_type => 'marc21',
marc_field => '150',
},
{
name => 'match-heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150a',
},
{
name => 'match-heading',
type => 'string',
facet => 0,
suggestible => 0,
searchable => 1,
sort => 0,
marc_type => 'marc21',
marc_field => '150(ae)',
},
);
my $se = Test::MockModule->new('Koha::SearchEngine::Elasticsearch');
$se->mock('_foreach_mapping', sub {
my ($self, $sub) = @_;
foreach my $map (@mappings) {
$sub->(
$map->{name},
$map->{type},
$map->{facet},
$map->{suggestible},
$map->{sort},
$map->{searchable},
$map->{marc_type},
$map->{marc_field}
);
}
});
my $see = Koha::SearchEngine::Elasticsearch::Search->new({ index => $Koha::SearchEngine::Elasticsearch::AUTHORITIES_INDEX });
my $marc_record_1 = MARC::Record->new();
$marc_record_1->append_fields(
MARC::Field->new('001', '123'),
MARC::Field->new('007', 'ku'),
MARC::Field->new('020', '', '', a => '1-56619-909-3'),
MARC::Field->new('150', '', '', a => 'Subject', v => 'Genresubdiv', x => 'Generalsubdiv', z => 'Geosubdiv'),
);
my $marc_record_2 = MARC::Record->new();
$marc_record_2->append_fields(
MARC::Field->new('150', '', '', a => 'Subject', v => 'Genresubdiv', z => 'Geosubdiv', x => 'Generalsubdiv', e => 'wrongsubdiv' ),
);
my $records = [$marc_record_1, $marc_record_2];
$see->get_elasticsearch_mappings(); #sort_fields will call this and use the actual db values unless we call it first
my $docs = $see->marc_records_to_documents($records);
ok(
any { $_ eq "Subject formsubdiv Genresubdiv generalsubdiv Generalsubdiv geographicsubdiv Geosubdiv" }
@{$docs->[0]->{'match-heading'}},
"First record match-heading should contain the correctly formatted heading"
);
ok(
any { $_ eq "Subject formsubdiv Genresubdiv geographicsubdiv Geosubdiv generalsubdiv Generalsubdiv" }
@{$docs->[1]->{'match-heading'}},
"Second record match-heading should contain the correctly formatted heading without wrong subfield"
);
};

33
t/db_dependent/Heading.t

@ -27,20 +27,23 @@ BEGIN {
}
subtest "MARC21 tests" => sub {
plan tests => 7;
plan tests => 9;
t::lib::Mocks::mock_preference('marcflavour', 'MARC21');
ok(C4::Heading::valid_bib_heading_subfield('100', 'a'), '100a valid');
ok(!C4::Heading::valid_bib_heading_subfield('100', 'e'), '100e not valid');
ok(C4::Heading::valid_heading_subfield('100', 'a'), '100a valid for bib');
ok(!C4::Heading::valid_heading_subfield('100', 'e'), '100e not valid for bib');
ok(C4::Heading::valid_heading_subfield('100', 'a', 1), '100a valid for authority');
ok(!C4::Heading::valid_heading_subfield('100', 'e', 1), '100e not valid for authority');
ok(C4::Heading::valid_bib_heading_subfield('110', 'a'), '110a valid');
ok(!C4::Heading::valid_bib_heading_subfield('110', 'e'), '110e not valid');
ok(C4::Heading::valid_heading_subfield('110', 'a'), '110a valid for bib');
ok(!C4::Heading::valid_heading_subfield('110', 'e'), '110e not valid for bib');
ok(C4::Heading::valid_bib_heading_subfield('600', 'a'), '600a valid');
ok(!C4::Heading::valid_bib_heading_subfield('600', 'e'), '600e not valid');
ok(C4::Heading::valid_heading_subfield('600', 'a'), '600a valid for bib');
ok(!C4::Heading::valid_heading_subfield('600', 'e'), '600e not valid for bib');
ok(!C4::Heading::valid_bib_heading_subfield('012', 'a'), '012a invalid field');
ok(!C4::Heading::valid_heading_subfield('012', 'a'), '012a invalid field for bib');
};
subtest "UNIMARC tests" => sub {
@ -48,14 +51,14 @@ subtest "UNIMARC tests" => sub {
t::lib::Mocks::mock_preference('marcflavour', 'UNIMARC');
ok(C4::Heading::valid_bib_heading_subfield('100', 'a'), '100a valid');
ok(!C4::Heading::valid_bib_heading_subfield('100', 'i'), '100i not valid');
ok(C4::Heading::valid_heading_subfield('100', 'a'), '100a valid for bib');
ok(!C4::Heading::valid_heading_subfield('100', 'i'), '100i not valid fir bib');
ok(C4::Heading::valid_bib_heading_subfield('110', 'a'), '110a valid');
ok(!C4::Heading::valid_bib_heading_subfield('110', 'i'), '110i not valid');
ok(C4::Heading::valid_heading_subfield('110', 'a'), '110a valid for bib');
ok(!C4::Heading::valid_heading_subfield('110', 'i'), '110i not valid for bib');
ok(C4::Heading::valid_bib_heading_subfield('600', 'a'), '600a valid');
ok(!C4::Heading::valid_bib_heading_subfield('600', 'i'), '600i not valid');
ok(C4::Heading::valid_heading_subfield('600', 'a'), '600a valid for bib');
ok(!C4::Heading::valid_heading_subfield('600', 'i'), '600i not valid for bib');
ok(!C4::Heading::valid_bib_heading_subfield('012', 'a'), '012a invalid field');
ok(!C4::Heading::valid_heading_subfield('012', 'a'), '012a invalid field for bib');
}

4
t/db_dependent/Heading_MARC21.t

@ -16,12 +16,12 @@ BEGIN {
SKIP: {
skip "MARC21 heading tests not applicable to UNIMARC", 2 if C4::Context->preference('marcflavour') eq 'UNIMARC';
my $field = MARC::Field->new( '650', ' ', '0', a => 'Uncles', x => 'Fiction' );
my $heading = C4::Heading->new_from_bib_field($field);
my $heading = C4::Heading->new_from_field($field);
is($heading->display_form(), 'Uncles--Fiction', 'Display form generation');
is($heading->search_form(), 'Uncles generalsubdiv Fiction', 'Search form generation');
$field = MARC::Field->new( '830', ' ', '4', a => 'The dark is rising ;', v => '3' );
$heading = C4::Heading->new_from_bib_field($field);
$heading = C4::Heading->new_from_field($field);
is($heading->display_form(), 'The dark is rising ;', 'Display form generation');
is($heading->search_form(), 'The dark is rising', 'Search form generation');

6
t/db_dependent/Linker_Default.t

@ -50,9 +50,9 @@ subtest 'Test caching in get_link and update_cache' => sub {
my $subject_field2 = MARC::Field->new($tags[0],0,2,$tags[1]=>'Science fiction');
my $genre_field = MARC::Field->new($tags[2],0,2,$tags[3]=>'Science fiction');
# Can we build a heading from it?
my $subject_heading = C4::Heading->new_from_bib_field( $subject_field, q{} );
my $subject_heading2 = C4::Heading->new_from_bib_field( $subject_field, q{} );
my $genre_heading = C4::Heading->new_from_bib_field( $genre_field, q{} );
my $subject_heading = C4::Heading->new_from_field( $subject_field, q{} );
my $subject_heading2 = C4::Heading->new_from_field( $subject_field, q{} );
my $genre_heading = C4::Heading->new_from_field( $genre_field, q{} );
# Now test to see if C4::Linker can find it.

2
t/db_dependent/Linker_FirstMatch.t

@ -110,7 +110,7 @@ sub run_tests {
my $heading;
ok(
defined(
$heading = C4::Heading->new_from_bib_field( $new_bibfield, q{} )
$heading = C4::Heading->new_from_field( $new_bibfield, q{} )
),
'Creating heading from bib field'
);

Loading…
Cancel
Save