Jonathan Druart
9d6d641d1f
On bug 17591 we discovered that there was something weird going on with the way we export and use subroutines/modules. This patch tries to standardize our EXPORT to use EXPORT_OK only. That way we will need to explicitely define the subroutine we want to use from a module. This patch is a squashed version of: Bug 17600: After export.pl Bug 17600: After perlimport Bug 17600: Manual changes Bug 17600: Other manual changes after second perlimports run Bug 17600: Fix tests And a lot of other manual changes. export.pl is a dirty script that can be found on bug 17600. "perlimport" is: git clone https://github.com/oalders/App-perlimports.git cd App-perlimports/ cpanm --installdeps . export PERL5LIB="$PERL5LIB:/kohadevbox/koha/App-perlimports/lib" find . \( -name "*.pl" -o -name "*.pm" \) -exec perl App-perlimports/script/perlimports --inplace-edit --no-preserve-unused --filename {} \; The ideas of this patch are to: * use EXPORT_OK instead of EXPORT * perltidy the EXPORT_OK list * remove '&' before the subroutine names * remove some uneeded use statements * explicitely import the subroutines we need within the controllers or modules Note that the private subroutines (starting with _) should not be exported (and not used from outside of the module except from tests). EXPORT vs EXPORT_OK (from https://www.thegeekstuff.com/2010/06/perl-exporter-examples/) """ Export allows to export the functions and variables of modules to user’s namespace using the standard import method. This way, we don’t need to create the objects for the modules to access it’s members. @EXPORT and @EXPORT_OK are the two main variables used during export operation. @EXPORT contains list of symbols (subroutines and variables) of the module to be exported into the caller namespace. @EXPORT_OK does export of symbols on demand basis. """ If this patch caused a conflict with a patch you wrote prior to its push: * Make sure you are not reintroducing a "use" statement that has been removed * "$subroutine" is not exported by the C4::$MODULE module means that you need to add the subroutine to the @EXPORT_OK list * Bareword "$subroutine" not allowed while "strict subs" means that you didn't imported the subroutine from the module: - use $MODULE qw( $subroutine list ); You can also use the fully qualified namespace: C4::$MODULE::$subroutine Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
308 lines
8.3 KiB
Perl
Executable file
308 lines
8.3 KiB
Perl
Executable file
#!/usr/bin/perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
BEGIN {
|
|
|
|
# find Koha's Perl modules
|
|
# test carefully before changing this
|
|
use FindBin ();
|
|
eval { require "$FindBin::Bin/kohalib.pl" };
|
|
}
|
|
|
|
use Koha::Script;
|
|
use C4::Context;
|
|
use C4::Biblio qw(
|
|
GetFrameworkCode
|
|
GetMarcBiblio
|
|
LinkBibHeadingsToAuthorities
|
|
ModBiblio
|
|
);
|
|
use Getopt::Long qw( GetOptions );
|
|
use Pod::Usage qw( pod2usage );
|
|
use Time::HiRes qw( time );
|
|
use POSIX qw( ceil strftime );
|
|
use Module::Load::Conditional qw( can_load );
|
|
|
|
sub usage {
|
|
pod2usage( -verbose => 2 );
|
|
exit;
|
|
}
|
|
|
|
$| = 1;
|
|
|
|
# command-line parameters
|
|
my $verbose = 0;
|
|
my $link_report = 0;
|
|
my $test_only = 0;
|
|
my $want_help = 0;
|
|
my $auth_limit;
|
|
my $bib_limit;
|
|
my $commit = 100;
|
|
my $tagtolink;
|
|
my $allowrelink = C4::Context->preference("CatalogModuleRelink") || '';
|
|
|
|
my $result = GetOptions(
|
|
'v|verbose' => \$verbose,
|
|
't|test' => \$test_only,
|
|
'l|link-report' => \$link_report,
|
|
'a|auth-limit=s' => \$auth_limit,
|
|
'b|bib-limit=s' => \$bib_limit,
|
|
'c|commit=i' => \$commit,
|
|
'g|tagtolink=i' => \$tagtolink,
|
|
'h|help' => \$want_help
|
|
);
|
|
|
|
binmode( STDOUT, ":encoding(UTF-8)" );
|
|
|
|
if ( not $result or $want_help ) {
|
|
usage();
|
|
}
|
|
|
|
my $linker_module =
|
|
"C4::Linker::" . ( C4::Context->preference("LinkerModule") || 'Default' );
|
|
unless ( can_load( modules => { $linker_module => undef } ) ) {
|
|
$linker_module = 'C4::Linker::Default';
|
|
unless ( can_load( modules => { $linker_module => undef } ) ) {
|
|
die "Unable to load linker module. Aborting.";
|
|
}
|
|
}
|
|
|
|
my $linker = $linker_module->new(
|
|
{
|
|
'auth_limit' => $auth_limit,
|
|
'options' => C4::Context->preference("LinkerOptions")
|
|
}
|
|
);
|
|
|
|
my $num_bibs_processed = 0;
|
|
my $num_bibs_modified = 0;
|
|
my $num_bad_bibs = 0;
|
|
my %unlinked_headings;
|
|
my %linked_headings;
|
|
my %fuzzy_headings;
|
|
my $dbh = C4::Context->dbh;
|
|
$dbh->{AutoCommit} = 0;
|
|
process_bibs( $linker, $bib_limit, $auth_limit, $commit, { tagtolink => $tagtolink, allowrelink => $allowrelink });
|
|
$dbh->commit();
|
|
|
|
exit 0;
|
|
|
|
sub process_bibs {
|
|
my ( $linker, $bib_limit, $auth_limit, $commit, $args ) = @_;
|
|
my $tagtolink = $args->{tagtolink};
|
|
my $allowrelink = $args->{allowrelink};
|
|
my $bib_where = '';
|
|
my $starttime = time();
|
|
if ($bib_limit) {
|
|
$bib_where = "WHERE $bib_limit";
|
|
}
|
|
my $sql =
|
|
"SELECT biblionumber FROM biblio $bib_where ORDER BY biblionumber ASC";
|
|
my $sth = $dbh->prepare($sql);
|
|
$sth->execute();
|
|
my $linker_args = { tagtolink => $tagtolink, allowrelink => $allowrelink };
|
|
while ( my ($biblionumber) = $sth->fetchrow_array() ) {
|
|
$num_bibs_processed++;
|
|
process_bib( $linker, $biblionumber, $linker_args );
|
|
|
|
if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
|
|
print_progress_and_commit($num_bibs_processed);
|
|
}
|
|
}
|
|
|
|
if ( not $test_only ) {
|
|
$dbh->commit;
|
|
}
|
|
|
|
my $headings_linked = 0;
|
|
my $headings_unlinked = 0;
|
|
my $headings_fuzzy = 0;
|
|
for ( values %linked_headings ) { $headings_linked += $_; }
|
|
for ( values %unlinked_headings ) { $headings_unlinked += $_; }
|
|
for ( values %fuzzy_headings ) { $headings_fuzzy += $_; }
|
|
|
|
my $endtime = time();
|
|
my $totaltime = ceil (($endtime - $starttime) * 1000);
|
|
$starttime = strftime('%D %T', localtime($starttime));
|
|
$endtime = strftime('%D %T', localtime($endtime));
|
|
|
|
my $summary = <<_SUMMARY_;
|
|
|
|
Bib authority heading linking report
|
|
=======================================================
|
|
Linker module: $linker_module
|
|
Run started at: $starttime
|
|
Run ended at: $endtime
|
|
Total run time: $totaltime ms
|
|
Number of bibs checked: $num_bibs_processed
|
|
Number of bibs modified: $num_bibs_modified
|
|
Number of bibs with errors: $num_bad_bibs
|
|
Number of headings linked: $headings_linked
|
|
Number of headings unlinked: $headings_unlinked
|
|
Number of headings fuzzily linked: $headings_fuzzy
|
|
_SUMMARY_
|
|
$summary .= "\n**** Ran in test mode only ****\n" if $test_only;
|
|
print $summary;
|
|
|
|
if ($link_report) {
|
|
my @keys;
|
|
print <<_LINKED_HEADER_;
|
|
|
|
Linked headings (from most frequent to least):
|
|
-------------------------------------------------------
|
|
|
|
_LINKED_HEADER_
|
|
|
|
@keys = sort {
|
|
$linked_headings{$b} <=> $linked_headings{$a} or "\L$a" cmp "\L$b"
|
|
} keys %linked_headings;
|
|
foreach my $key (@keys) {
|
|
print "$key:\t" . $linked_headings{$key} . " occurrences\n";
|
|
}
|
|
|
|
print <<_UNLINKED_HEADER_;
|
|
|
|
Unlinked headings (from most frequent to least):
|
|
-------------------------------------------------------
|
|
|
|
_UNLINKED_HEADER_
|
|
|
|
@keys = sort {
|
|
$unlinked_headings{$b} <=> $unlinked_headings{$a}
|
|
or "\L$a" cmp "\L$b"
|
|
} keys %unlinked_headings;
|
|
foreach my $key (@keys) {
|
|
print "$key:\t" . $unlinked_headings{$key} . " occurrences\n";
|
|
}
|
|
|
|
print <<_FUZZY_HEADER_;
|
|
|
|
Fuzzily-matched headings (from most frequent to least):
|
|
-------------------------------------------------------
|
|
|
|
_FUZZY_HEADER_
|
|
|
|
@keys = sort {
|
|
$fuzzy_headings{$b} <=> $fuzzy_headings{$a} or "\L$a" cmp "\L$b"
|
|
} keys %fuzzy_headings;
|
|
foreach my $key (@keys) {
|
|
print "$key:\t" . $fuzzy_headings{$key} . " occurrences\n";
|
|
}
|
|
print $summary;
|
|
}
|
|
}
|
|
|
|
sub process_bib {
|
|
my $linker = shift;
|
|
my $biblionumber = shift;
|
|
my $args = shift;
|
|
my $tagtolink = $args->{tagtolink};
|
|
my $allowrelink = $args->{allowrelink};
|
|
my $bib = GetMarcBiblio({ biblionumber => $biblionumber });
|
|
unless ( defined $bib ) {
|
|
print
|
|
"\nCould not retrieve bib $biblionumber from the database - record is corrupt.\n";
|
|
$num_bad_bibs++;
|
|
return;
|
|
}
|
|
|
|
my $frameworkcode = GetFrameworkCode($biblionumber);
|
|
|
|
my ( $headings_changed, $results ) =
|
|
LinkBibHeadingsToAuthorities( $linker, $bib, $frameworkcode, $allowrelink, $tagtolink );
|
|
foreach my $key ( keys %{ $results->{'unlinked'} } ) {
|
|
$unlinked_headings{$key} += $results->{'unlinked'}->{$key};
|
|
}
|
|
foreach my $key ( keys %{ $results->{'linked'} } ) {
|
|
$linked_headings{$key} += $results->{'linked'}->{$key};
|
|
}
|
|
foreach my $key ( keys %{ $results->{'fuzzy'} } ) {
|
|
$fuzzy_headings{$key} += $results->{'fuzzy'}->{$key};
|
|
}
|
|
|
|
if ($headings_changed) {
|
|
if ($verbose) {
|
|
my $title = substr( $bib->title, 0, 20 );
|
|
printf(
|
|
"Bib %12d (%-20s): %3d headings changed\n",
|
|
$biblionumber,
|
|
$title,
|
|
$headings_changed
|
|
);
|
|
}
|
|
if ( not $test_only ) {
|
|
ModBiblio( $bib, $biblionumber, $frameworkcode, 1 );
|
|
#Last param is to note ModBiblio was called from linking script and bib should not be linked again
|
|
$num_bibs_modified++;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub print_progress_and_commit {
|
|
my $recs = shift;
|
|
$dbh->commit();
|
|
print "... processed $recs records\n";
|
|
}
|
|
|
|
=head1 NAME
|
|
|
|
link_bibs_to_authorities.pl
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
link_bibs_to_authorities.pl
|
|
link_bibs_to_authorities.pl -v
|
|
link_bibs_to_authorities.pl -l
|
|
link_bibs_to_authorities.pl --commit=1000
|
|
link_bibs_to_authorities.pl --auth-limit=STRING
|
|
link_bibs_to_authorities.pl --bib-limit=STRING
|
|
link_bibs_to_authorities.pl -g=700
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This batch job checks each bib record in the Koha database and attempts to link
|
|
each of its headings to the matching authority record.
|
|
|
|
=over 8
|
|
|
|
=item B<--help>
|
|
|
|
Prints this help
|
|
|
|
=item B<-v|--verbose>
|
|
|
|
Provide verbose log information (print the number of headings changed for each
|
|
bib record).
|
|
|
|
=item B<-l|--link-report>
|
|
|
|
Provide a report of all the headings that were processed: which were matched,
|
|
which were not, etc.
|
|
|
|
=item B<--auth-limit=S>
|
|
|
|
Only process those headings which match an authority record that matches the
|
|
user-specified WHERE clause.
|
|
|
|
=item B<--bib-limit=S>
|
|
|
|
Only process those bib records that match the user-specified WHERE clause.
|
|
|
|
=item B<--commit=N>
|
|
|
|
Commit the results to the database after every N records are processed.
|
|
|
|
=item B<-g=N>
|
|
|
|
Only process those headings found in MARC field N.
|
|
|
|
=item B<--test>
|
|
|
|
Only test the authority linking and report the results; do not change the bib
|
|
records.
|
|
|
|
=back
|
|
|
|
=cut
|