9f4cd427db
In UNIMARC instances, the run time of link_bibs_to_authorities.pl
can be reduced by up to 80% and the number of DBI calls
can be reduced by up to 90% with a very simple fix that
optimises the constructor of the C4::Heading::UNIMARC object.
Currently, the constructor resets the $bib_heading_fields hash
*in each invocation* (i.e. for every field the bibliographic
record contains), then populating it again with the results
fetched from the database! This is inefficient.
The patch/fix is trivial: we take advantage of the fact that
$bib_heading_fields is declared at the top of the
C4::Heading::UNIMARC module and is thus a package variable
that is in scope for the entire execution of the program
(more info here: https://stackoverflow.com/q/75317862).
Placing the section that generates the $bib_heading_fields
hash inside a "unless ( defined $bib_heading_fields )" code
block is enough to cause a significant reduction in the
number of "expensive" SQL SELECT queries that must be run.
Test plan:
0) Have a UNIMARC instance with some sample data (the KTD one
will do just fine for this experiment).
1) Run the following commands:
$ ktd --shell
k$ DBI_PROFILE=1 ./misc/link_bibs_to_authorities.pl -t
Observe the output from the script and the DBI profiling info.
[You may want to play with different DBI_PROFILE levels (such as
2, 4, 6, 8, etc.) to see what's going on under the hood DBI-wise,
for reference see: https://metacpan.org/pod/DBI::Profile]
2) Apply this patch.
3) Rerun the script from step 1), it should run a lot faster!
Signed-off-by: David Nind <david@davidnind.com>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
(cherry picked from commit fe18b05692
)
Signed-off-by: Fridolin Somers <fridolin.somers@biblibre.com>
226 lines
4.9 KiB
Perl
226 lines
4.9 KiB
Perl
package C4::Heading::UNIMARC;
|
|
|
|
# Copyright (C) 2011 C & P Bibliography Services
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
use 5.010;
|
|
use strict;
|
|
use warnings;
|
|
use MARC::Field;
|
|
use C4::Context;
|
|
|
|
|
|
=head1 NAME
|
|
|
|
C4::Heading::UNIMARC
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use C4::Heading::UNIMARC;
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This is an internal helper class used by
|
|
C<C4::Heading> to parse headings data from
|
|
UNIMARC records. Object of this type
|
|
do not carry data, instead, they only
|
|
dispatch functions.
|
|
|
|
=head1 DATA STRUCTURES
|
|
|
|
FIXME - this should be moved to a configuration file.
|
|
|
|
=head2 subdivisions
|
|
|
|
=cut
|
|
|
|
my %subdivisions = (
|
|
'j' => 'formsubdiv',
|
|
'x' => 'generalsubdiv',
|
|
'y' => 'geographicsubdiv',
|
|
'z' => 'chronologicalsubdiv',
|
|
);
|
|
|
|
my $bib_heading_fields;
|
|
|
|
=head1 METHODS
|
|
|
|
=head2 new
|
|
|
|
my $marc_handler = C4::Heading::UNIMARC->new();
|
|
|
|
=cut
|
|
|
|
sub new {
|
|
my $class = shift;
|
|
|
|
unless ( defined $bib_heading_fields ) {
|
|
my $dbh = C4::Context->dbh;
|
|
my $sth = $dbh->prepare(
|
|
"SELECT tagfield, authtypecode
|
|
FROM marc_subfield_structure
|
|
WHERE frameworkcode = '' AND authtypecode <> ''"
|
|
);
|
|
$sth->execute();
|
|
$bib_heading_fields = {};
|
|
while ( my ( $tag, $auth_type ) = $sth->fetchrow ) {
|
|
$bib_heading_fields->{$tag} = {
|
|
auth_type => $auth_type,
|
|
subfields => 'abcdefghjklmnopqrstvxyz',
|
|
};
|
|
}
|
|
}
|
|
|
|
return bless {}, $class;
|
|
}
|
|
|
|
=head2 valid_heading_tag
|
|
|
|
=cut
|
|
|
|
sub valid_heading_tag {
|
|
my ( $self, $tag ) = @_;
|
|
return $bib_heading_fields->{$tag};
|
|
}
|
|
|
|
=head2 valid_heading_subfield
|
|
|
|
=cut
|
|
|
|
sub valid_heading_subfield {
|
|
my $self = shift;
|
|
my $tag = shift;
|
|
my $subfield = shift;
|
|
|
|
if ( exists $bib_heading_fields->{$tag} ) {
|
|
return 1 if ($bib_heading_fields->{$tag}->{subfields} =~ /$subfield/);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
=head2 parse_heading
|
|
|
|
=cut
|
|
|
|
sub parse_heading {
|
|
my ( $self, $field ) = @_;
|
|
|
|
my $tag = $field->tag;
|
|
my $field_info = $bib_heading_fields->{$tag};
|
|
my $auth_type = $field_info->{'auth_type'};
|
|
my $search_heading =
|
|
_get_search_heading( $field, $field_info->{'subfields'} );
|
|
my $display_heading =
|
|
_get_display_heading( $field, $field_info->{'subfields'} );
|
|
|
|
return ( $auth_type, undef, $search_heading, $display_heading, 'exact' );
|
|
}
|
|
|
|
=head1 INTERNAL FUNCTIONS
|
|
|
|
=head2 _get_subject_thesaurus
|
|
|
|
=cut
|
|
|
|
sub _get_subject_thesaurus {
|
|
my $field = shift;
|
|
|
|
my $thesaurus = "notdefined";
|
|
my $sf2 = $field->subfield('2');
|
|
$thesaurus = $sf2 if defined($sf2);
|
|
|
|
return $thesaurus;
|
|
}
|
|
|
|
=head2 _get_search_heading
|
|
|
|
=cut
|
|
|
|
sub _get_search_heading {
|
|
my $field = shift;
|
|
my $subfields = shift;
|
|
|
|
my $heading = "";
|
|
my @subfields = $field->subfields();
|
|
my $first = 1;
|
|
for ( my $i = 0 ; $i <= $#subfields ; $i++ ) {
|
|
my $code = $subfields[$i]->[0];
|
|
my $code_re = quotemeta $code;
|
|
my $value = $subfields[$i]->[1];
|
|
$value =~ s/[\s]*[-,.:=;!%\/][\s]*$//;
|
|
next unless $subfields =~ qr/$code_re/;
|
|
if ($first) {
|
|
$first = 0;
|
|
$heading = $value;
|
|
}
|
|
else {
|
|
if ( exists $subdivisions{$code} ) {
|
|
$heading .= " $subdivisions{$code} $value";
|
|
}
|
|
else {
|
|
$heading .= " $value";
|
|
}
|
|
}
|
|
}
|
|
|
|
# remove characters that are part of CCL syntax
|
|
$heading =~ s/[)(=]//g;
|
|
|
|
return $heading;
|
|
}
|
|
|
|
=head2 _get_display_heading
|
|
|
|
=cut
|
|
|
|
sub _get_display_heading {
|
|
my $field = shift;
|
|
my $subfields = shift;
|
|
|
|
my $heading = "";
|
|
my @subfields = $field->subfields();
|
|
my $first = 1;
|
|
for ( my $i = 0 ; $i <= $#subfields ; $i++ ) {
|
|
my $code = $subfields[$i]->[0];
|
|
my $code_re = quotemeta $code;
|
|
my $value = $subfields[$i]->[1];
|
|
next unless $subfields =~ qr/$code_re/;
|
|
if ($first) {
|
|
$first = 0;
|
|
$heading = $value;
|
|
}
|
|
else {
|
|
if ( exists $subdivisions{$code} ) {
|
|
$heading .= "--$value";
|
|
}
|
|
else {
|
|
$heading .= " $value";
|
|
}
|
|
}
|
|
}
|
|
return $heading;
|
|
}
|
|
|
|
=head1 AUTHOR
|
|
|
|
Koha Development Team <http://koha-community.org/>
|
|
|
|
Jared Camins-Esakov <jcamins@cpbibliography.com>
|
|
|
|
=cut
|
|
|
|
1;
|