Tomas Cohen Arazi
4310e13a64
This patch makes the trivial change of making the linker script rely on LinkerRelink instead of CatalogModuleRelink, which is confusing and incorrect (according to both sysprefs descriptions). To test: 1. Verify that relinking is now tied to the right syspref Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io> Signed-off-by: Phil Ringnalda <phil@chetcolibrary.org> Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com> Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
341 lines
9.7 KiB
Perl
Executable file
341 lines
9.7 KiB
Perl
Executable file
#!/usr/bin/perl
|
|
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
use Modern::Perl;
|
|
|
|
use Koha::Script;
|
|
use C4::Context;
|
|
use C4::Biblio qw(
|
|
GetFrameworkCode
|
|
LinkBibHeadingsToAuthorities
|
|
ModBiblio
|
|
);
|
|
use Koha::Biblios;
|
|
use Getopt::Long qw( GetOptions );
|
|
use Pod::Usage qw( pod2usage );
|
|
use Time::HiRes qw( time );
|
|
use POSIX qw( ceil strftime );
|
|
use Module::Load::Conditional qw( can_load );
|
|
|
|
use Koha::Database;
|
|
use Koha::SearchEngine;
|
|
use Koha::SearchEngine::Indexer;
|
|
|
|
sub usage {
|
|
pod2usage( -verbose => 2 );
|
|
exit;
|
|
}
|
|
|
|
$| = 1;
|
|
|
|
# command-line parameters
|
|
my $verbose = 0;
|
|
my $link_report = 0;
|
|
my $test_only = 0;
|
|
my $want_help = 0;
|
|
my $auth_limit;
|
|
my $bib_limit;
|
|
my $commit = 100;
|
|
my $tagtolink;
|
|
my $allowrelink = C4::Context->preference("LinkerRelink") // '';
|
|
|
|
my $result = GetOptions(
|
|
'v|verbose' => \$verbose,
|
|
't|test' => \$test_only,
|
|
'l|link-report' => \$link_report,
|
|
'a|auth-limit=s' => \$auth_limit,
|
|
'b|bib-limit=s' => \$bib_limit,
|
|
'c|commit=i' => \$commit,
|
|
'g|tagtolink=i' => \$tagtolink,
|
|
'h|help' => \$want_help
|
|
);
|
|
|
|
binmode( STDOUT, ":encoding(UTF-8)" );
|
|
|
|
if ( not $result or $want_help ) {
|
|
usage();
|
|
}
|
|
|
|
my $linker_module =
|
|
"C4::Linker::" . ( C4::Context->preference("LinkerModule") || 'Default' );
|
|
unless ( can_load( modules => { $linker_module => undef } ) ) {
|
|
$linker_module = 'C4::Linker::Default';
|
|
unless ( can_load( modules => { $linker_module => undef } ) ) {
|
|
die "Unable to load linker module. Aborting.";
|
|
}
|
|
}
|
|
|
|
my $linker = $linker_module->new(
|
|
{
|
|
'auth_limit' => $auth_limit,
|
|
'options' => C4::Context->preference("LinkerOptions")
|
|
}
|
|
);
|
|
|
|
my $num_bibs_processed = 0;
|
|
my $num_bibs_modified = 0;
|
|
my $num_bad_bibs = 0;
|
|
my %unlinked_headings;
|
|
my %linked_headings;
|
|
my %fuzzy_headings;
|
|
my $dbh = C4::Context->dbh;
|
|
my @updated_biblios = ();
|
|
my $indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::BIBLIOS_INDEX });
|
|
|
|
my $schema = Koha::Database->schema;
|
|
$schema->txn_begin;
|
|
process_bibs( $linker, $bib_limit, $auth_limit, $commit, { tagtolink => $tagtolink, allowrelink => $allowrelink });
|
|
|
|
exit 0;
|
|
|
|
sub process_bibs {
|
|
my ( $linker, $bib_limit, $auth_limit, $commit, $args ) = @_;
|
|
my $tagtolink = $args->{tagtolink};
|
|
my $allowrelink = $args->{allowrelink};
|
|
my $bib_where = '';
|
|
my $starttime = time();
|
|
if ($bib_limit) {
|
|
$bib_where = "WHERE $bib_limit";
|
|
}
|
|
my $sql =
|
|
"SELECT biblionumber FROM biblio $bib_where ORDER BY biblionumber ASC";
|
|
my $sth = $dbh->prepare($sql);
|
|
$sth->execute();
|
|
my $linker_args = { tagtolink => $tagtolink, allowrelink => $allowrelink };
|
|
while ( my ($biblionumber) = $sth->fetchrow_array() ) {
|
|
$num_bibs_processed++;
|
|
process_bib( $linker, $biblionumber, $linker_args );
|
|
|
|
if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
|
|
print_progress_and_commit($num_bibs_processed);
|
|
}
|
|
}
|
|
|
|
if ( not $test_only ) {
|
|
$schema->txn_commit;
|
|
$indexer->index_records( \@updated_biblios, "specialUpdate", "biblioserver" );
|
|
}
|
|
|
|
my $headings_linked = 0;
|
|
my $headings_unlinked = 0;
|
|
my $headings_fuzzy = 0;
|
|
for ( values %linked_headings ) { $headings_linked += $_; }
|
|
for ( values %unlinked_headings ) { $headings_unlinked += $_; }
|
|
for ( values %fuzzy_headings ) { $headings_fuzzy += $_; }
|
|
|
|
my $endtime = time();
|
|
my $totaltime = ceil (($endtime - $starttime) * 1000);
|
|
$starttime = strftime('%D %T', localtime($starttime));
|
|
$endtime = strftime('%D %T', localtime($endtime));
|
|
|
|
my $summary = <<_SUMMARY_;
|
|
|
|
Bib authority heading linking report
|
|
=======================================================
|
|
Linker module: $linker_module
|
|
Run started at: $starttime
|
|
Run ended at: $endtime
|
|
Total run time: $totaltime ms
|
|
Number of bibs checked: $num_bibs_processed
|
|
Number of bibs modified: $num_bibs_modified
|
|
Number of bibs with errors: $num_bad_bibs
|
|
Number of headings linked: $headings_linked
|
|
Number of headings unlinked: $headings_unlinked
|
|
Number of headings fuzzily linked: $headings_fuzzy
|
|
_SUMMARY_
|
|
$summary .= "\n**** Ran in test mode only ****\n" if $test_only;
|
|
print $summary;
|
|
|
|
if ($link_report) {
|
|
my @keys;
|
|
print <<_LINKED_HEADER_;
|
|
|
|
Linked headings (from most frequent to least):
|
|
-------------------------------------------------------
|
|
|
|
_LINKED_HEADER_
|
|
|
|
@keys = sort {
|
|
$linked_headings{$b} <=> $linked_headings{$a} or "\L$a" cmp "\L$b"
|
|
} keys %linked_headings;
|
|
foreach my $key (@keys) {
|
|
print "$key:\t" . $linked_headings{$key} . " occurrences\n";
|
|
}
|
|
|
|
print <<_UNLINKED_HEADER_;
|
|
|
|
Unlinked headings (from most frequent to least):
|
|
-------------------------------------------------------
|
|
|
|
_UNLINKED_HEADER_
|
|
|
|
@keys = sort {
|
|
$unlinked_headings{$b} <=> $unlinked_headings{$a}
|
|
or "\L$a" cmp "\L$b"
|
|
} keys %unlinked_headings;
|
|
foreach my $key (@keys) {
|
|
print "$key:\t" . $unlinked_headings{$key} . " occurrences\n";
|
|
}
|
|
|
|
print <<_FUZZY_HEADER_;
|
|
|
|
Fuzzily-matched headings (from most frequent to least):
|
|
-------------------------------------------------------
|
|
|
|
_FUZZY_HEADER_
|
|
|
|
@keys = sort {
|
|
$fuzzy_headings{$b} <=> $fuzzy_headings{$a} or "\L$a" cmp "\L$b"
|
|
} keys %fuzzy_headings;
|
|
foreach my $key (@keys) {
|
|
print "$key:\t" . $fuzzy_headings{$key} . " occurrences\n";
|
|
}
|
|
print $summary;
|
|
}
|
|
}
|
|
|
|
sub process_bib {
|
|
my $linker = shift;
|
|
my $biblionumber = shift;
|
|
my $args = shift;
|
|
my $tagtolink = $args->{tagtolink};
|
|
my $allowrelink = $args->{allowrelink};
|
|
my $biblio = Koha::Biblios->find($biblionumber);
|
|
my $record;
|
|
eval { $record = $biblio->metadata->record; };
|
|
unless ( defined $record ) {
|
|
warn "Could not retrieve bib $biblionumber from the database - record is corrupt.";
|
|
$num_bad_bibs++;
|
|
return;
|
|
}
|
|
|
|
my $frameworkcode = GetFrameworkCode($biblionumber);
|
|
|
|
my ( $headings_changed, $results );
|
|
|
|
eval {
|
|
( $headings_changed, $results ) =
|
|
LinkBibHeadingsToAuthorities( $linker, $record, $frameworkcode, $allowrelink, $tagtolink );
|
|
};
|
|
if ($@) {
|
|
warn "Error while searching for authorities for biblionumber $biblionumber at " . localtime(time);
|
|
$num_bad_bibs++;
|
|
return;
|
|
}
|
|
|
|
foreach my $key ( keys %{ $results->{'unlinked'} } ) {
|
|
$unlinked_headings{$key} += $results->{'unlinked'}->{$key};
|
|
}
|
|
foreach my $key ( keys %{ $results->{'linked'} } ) {
|
|
$linked_headings{$key} += $results->{'linked'}->{$key};
|
|
}
|
|
foreach my $key ( keys %{ $results->{'fuzzy'} } ) {
|
|
$fuzzy_headings{$key} += $results->{'fuzzy'}->{$key};
|
|
}
|
|
|
|
if ($headings_changed) {
|
|
if ($verbose) {
|
|
my $title = substr( $record->title, 0, 20 );
|
|
printf(
|
|
"Bib %12d (%-20s): %3d headings changed\n",
|
|
$biblionumber,
|
|
$title,
|
|
$headings_changed
|
|
);
|
|
}
|
|
if ( not $test_only ) {
|
|
ModBiblio( $record, $biblionumber, $frameworkcode, {
|
|
disable_autolink => 1,
|
|
skip_holds_queue => 1,
|
|
skip_record_index =>1
|
|
});
|
|
push @updated_biblios, $biblionumber;
|
|
#Last param is to note ModBiblio was called from linking script and bib should not be linked again
|
|
$num_bibs_modified++;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub print_progress_and_commit {
|
|
my $recs = shift;
|
|
$schema->txn_commit();
|
|
$indexer->index_records( \@updated_biblios, "specialUpdate", "biblioserver" );
|
|
@updated_biblios = ();
|
|
$schema->txn_begin();
|
|
print "... processed $recs records\n";
|
|
}
|
|
|
|
=head1 NAME
|
|
|
|
link_bibs_to_authorities.pl
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
link_bibs_to_authorities.pl
|
|
link_bibs_to_authorities.pl -v
|
|
link_bibs_to_authorities.pl -l
|
|
link_bibs_to_authorities.pl --commit=1000
|
|
link_bibs_to_authorities.pl --auth-limit=STRING
|
|
link_bibs_to_authorities.pl --bib-limit=STRING
|
|
link_bibs_to_authorities.pl -g=700
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This batch job checks each bib record in the Koha database and attempts to link
|
|
each of its headings to the matching authority record.
|
|
|
|
=over 8
|
|
|
|
=item B<--help>
|
|
|
|
Prints this help
|
|
|
|
=item B<-v|--verbose>
|
|
|
|
Provide verbose log information (print the number of headings changed for each
|
|
bib record).
|
|
|
|
=item B<-l|--link-report>
|
|
|
|
Provide a report of all the headings that were processed: which were matched,
|
|
which were not, etc.
|
|
|
|
=item B<--auth-limit=S>
|
|
|
|
Only process those headings which match an authority record that matches the
|
|
user-specified WHERE clause.
|
|
|
|
=item B<--bib-limit=S>
|
|
|
|
Only process those bib records that match the user-specified WHERE clause.
|
|
|
|
=item B<--commit=N>
|
|
|
|
Commit the results to the database after every N records are processed.
|
|
|
|
=item B<-g=N>
|
|
|
|
Only process those headings found in MARC field N.
|
|
|
|
=item B<--test>
|
|
|
|
Only test the authority linking and report the results; do not change the bib
|
|
records.
|
|
|
|
=back
|
|
|
|
=cut
|