Koha/C4/Heading/MARC21.pm
Nick Clemens 2cf4496dd6
Bug 24094: Strip trailing spaces and punctuation from authority headings
Both when searching for and creating new authorities we need to remove
punctuation that exists in the bibliographic record but does not belong in
the authority record.

For example, a series with a volume contains a semicolon in the bib record,
however, this should not be passed to the authority as the volume is not
included in the authority record.

To test:
 1 - Set AutoCreateAuthorities to 'generate'
 2 - Set BiblioAddsAuthorities to 'true'
 3 - Set CatalogModuleRelink to 'Do'
 4 - Find or create a record with:
    a 100 field with a subfield e preceded by a comma: 100 $aBoring, M. Eugene M ,$e author
    an 830 field with a volume preceded by a semicolon: 650$aLord of the rings ;$v 3.
 5 - Save the records and check the links
 6 - Note punctuation is passed through
 7 - Save again, auth records are created again
 8 - Apply patch
 9 - Save again, new auth records are created again
10 - Check the records, punctuation has been removed
11 - Save again, no more records created.

Signed-off-by: Myka Kennedy Stephens <mkstephens@lancasterseminary.edu>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
2020-02-19 11:31:28 +00:00

332 lines
8.1 KiB
Perl

package C4::Heading::MARC21;
# Copyright (C) 2008 LibLime
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
use strict;
use warnings;
use MARC::Record;
use MARC::Field;
=head1 NAME
C4::Heading::MARC21
=head1 SYNOPSIS
use C4::Heading::MARC21;
=head1 DESCRIPTION
This is an internal helper class used by
C<C4::Heading> to parse headings data from
MARC21 records. Object of this type
do not carry data, instead, they only
dispatch functions.
=head1 DATA STRUCTURES
FIXME - this should be moved to a configuration file.
=head2 bib_heading_fields
=cut
my $bib_heading_fields = {
'100' => {
auth_type => 'PERSO_NAME',
subfields => 'abcdfghjklmnopqrst',
main_entry => 1
},
'110' => {
auth_type => 'CORPO_NAME',
subfields => 'abcdfghklmnoprst',
main_entry => 1
},
'111' => {
auth_type => 'MEETI_NAME',
subfields => 'acdfghjklnpqst',
main_entry => 1
},
'130' => {
auth_type => 'UNIF_TITLE',
subfields => 'adfghklmnoprst',
main_entry => 1
},
'440' => { auth_type => 'UNIF_TITLE', subfields => 'anp', series => 1 },
'600' => {
auth_type => 'PERSO_NAME',
subfields => 'abcdfghjklmnopqrstvxyz',
subject => 1
},
'610' => {
auth_type => 'CORPO_NAME',
subfields => 'abcdfghklmnoprstvxyz',
subject => 1
},
'611' => {
auth_type => 'MEETI_NAME',
subfields => 'acdfghjklnpqstvxyz',
subject => 1
},
'630' => {
auth_type => 'UNIF_TITLE',
subfields => 'adfghklmnoprstvxyz',
subject => 1
},
'648' => { auth_type => 'CHRON_TERM', subfields => 'avxyz', subject => 1 },
'650' => { auth_type => 'TOPIC_TERM', subfields => 'abvxyz', subject => 1 },
'651' => { auth_type => 'GEOGR_NAME', subfields => 'avxyz', subject => 1 },
'655' => { auth_type => 'GENRE/FORM', subfields => 'avxyz', subject => 1 },
'690' => { auth_type => 'TOPIC_TERM', subfields => 'abvxyz', subject => 1 },
'691' => { auth_type => 'GEOGR_NAME', subfields => 'avxyz', subject => 1 },
'696' => { auth_type => 'PERSO_NAME', subfields => 'abcdfghjklmnopqrst' },
'697' => { auth_type => 'CORPO_NAME', subfields => 'abcdfghklmnoprst' },
'698' => { auth_type => 'MEETI_NAME', subfields => 'acdfghjklnpqst' },
'699' => { auth_type => 'UNIF_TITLE', subfields => 'adfghklmnoprst' },
'700' => { auth_type => 'PERSO_NAME', subfields => 'abcdfghjklmnopqrst' },
'710' => { auth_type => 'CORPO_NAME', subfields => 'abcdfghklmnoprst' },
'711' => { auth_type => 'MEETI_NAME', subfields => 'acdfghjklnpqst' },
'730' => { auth_type => 'UNIF_TITLE', subfields => 'adfghklmnoprst' },
'800' => {
auth_type => 'PERSO_NAME',
subfields => 'abcdfghjklmnopqrst',
series => 1
},
'810' => {
auth_type => 'CORPO_NAME',
subfields => 'abcdfghklmnoprst',
series => 1
},
'811' =>
{ auth_type => 'MEETI_NAME', subfields => 'acdfghjklnpqst', series => 1 },
'830' =>
{ auth_type => 'UNIF_TITLE', subfields => 'adfghklmnoprst', series => 1 },
};
=head2 subdivisions
=cut
my %subdivisions = (
'v' => 'formsubdiv',
'x' => 'generalsubdiv',
'y' => 'chronologicalsubdiv',
'z' => 'geographicsubdiv',
);
=head1 METHODS
=head2 new
my $marc_handler = C4::Heading::MARC21->new();
=cut
sub new {
my $class = shift;
return bless {}, $class;
}
=head2 valid_bib_heading_tag
=cut
sub valid_bib_heading_tag {
my $self = shift;
my $tag = shift;
my $frameworkcode = shift;
if ( exists $bib_heading_fields->{$tag} ) {
return 1;
}
else {
return 0;
}
}
=head2 valid_bib_heading_subfield
=cut
sub valid_bib_heading_subfield {
my $self = shift;
my $tag = shift;
my $subfield = shift;
if ( exists $bib_heading_fields->{$tag} ) {
return 1 if ($bib_heading_fields->{$tag}->{subfields} =~ /$subfield/);
}
return 0;
}
=head2 parse_heading
=cut
sub parse_heading {
my $self = shift;
my $field = shift;
my $tag = $field->tag;
my $field_info = $bib_heading_fields->{$tag};
my $auth_type = $field_info->{'auth_type'};
my $thesaurus =
$tag =~ m/6../
? _get_subject_thesaurus($field)
: "lcsh"; # use 'lcsh' for names, UT, etc.
my $search_heading =
_get_search_heading( $field, $field_info->{'subfields'} );
my $display_heading =
_get_display_heading( $field, $field_info->{'subfields'} );
return ( $auth_type, $thesaurus, $search_heading, $display_heading,
'exact' );
}
=head1 INTERNAL FUNCTIONS
=head2 _get_subject_thesaurus
=cut
sub _get_subject_thesaurus {
my $field = shift;
my $ind2 = $field->indicator(2);
my $thesaurus = "notdefined";
if ( $ind2 eq '0' ) {
$thesaurus = "lcsh";
}
elsif ( $ind2 eq '1' ) {
$thesaurus = "lcac";
}
elsif ( $ind2 eq '2' ) {
$thesaurus = "mesh";
}
elsif ( $ind2 eq '3' ) {
$thesaurus = "nal";
}
elsif ( $ind2 eq '4' ) {
$thesaurus = "notspecified";
}
elsif ( $ind2 eq '5' ) {
$thesaurus = "cash";
}
elsif ( $ind2 eq '6' ) {
$thesaurus = "rvm";
}
elsif ( $ind2 eq '7' ) {
my $sf2 = $field->subfield('2');
$thesaurus = $sf2 if defined($sf2);
}
return $thesaurus;
}
=head2 _get_search_heading
=cut
sub _get_search_heading {
my $field = shift;
my $subfields = shift;
my $heading = "";
my @subfields = $field->subfields();
my $first = 1;
for ( my $i = 0 ; $i <= $#subfields ; $i++ ) {
my $code = $subfields[$i]->[0];
my $code_re = quotemeta $code;
my $value = $subfields[$i]->[1];
$value =~ s/[\s]*[-,.:=;!%\/][\s]*$//;
next unless $subfields =~ qr/$code_re/;
if ($first) {
$first = 0;
$heading = $value;
}
else {
if ( exists $subdivisions{$code} ) {
$heading .= " $subdivisions{$code} $value";
}
else {
$heading .= " $value";
}
}
}
# remove characters that are part of CCL syntax
$heading =~ s/[)(=]//g;
return $heading;
}
=head2 _get_display_heading
=cut
sub _get_display_heading {
my $field = shift;
my $subfields = shift;
my $heading = "";
my @subfields = $field->subfields();
my $first = 1;
for ( my $i = 0 ; $i <= $#subfields ; $i++ ) {
my $code = $subfields[$i]->[0];
my $code_re = quotemeta $code;
my $value = $subfields[$i]->[1];
next unless $subfields =~ qr/$code_re/;
if ($first) {
$first = 0;
$heading = $value;
}
else {
if ( exists $subdivisions{$code} ) {
$heading .= "--$value";
}
else {
$heading .= " $value";
}
}
}
return $heading;
}
# Additional limiters that we aren't using:
# if ($self->{'subject_added_entry'}) {
# $limiters .= " AND Heading-use-subject-added-entry=a";
# }
# if ($self->{'series_added_entry'}) {
# $limiters .= " AND Heading-use-series-added-entry=a";
# }
# if (not $self->{'subject_added_entry'} and not $self->{'series_added_entry'}) {
# $limiters .= " AND Heading-use-main-or-added-entry=a"
# }
=head1 AUTHOR
Koha Development Team <http://koha-community.org/>
Galen Charlton <galen.charlton@liblime.com>
=cut
1;