3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it
6 # under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or
8 # (at your option) any later version.
10 # Koha is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 LinkBibHeadingsToAuthorities
28 use Getopt::Long qw( GetOptions );
29 use Pod::Usage qw( pod2usage );
30 use Time::HiRes qw( time );
31 use POSIX qw( ceil strftime );
32 use Module::Load::Conditional qw( can_load );
35 use Koha::SearchEngine;
36 use Koha::SearchEngine::Indexer;
39 pod2usage( -verbose => 2 );
45 # command-line parameters
54 my $allowrelink = C4::Context->preference("LinkerRelink") // '';
56 my $result = GetOptions(
57 'v|verbose' => \$verbose,
58 't|test' => \$test_only,
59 'l|link-report' => \$link_report,
60 'a|auth-limit=s' => \$auth_limit,
61 'b|bib-limit=s' => \$bib_limit,
62 'c|commit=i' => \$commit,
63 'g|tagtolink=i' => \$tagtolink,
64 'h|help' => \$want_help
67 binmode( STDOUT, ":encoding(UTF-8)" );
69 if ( not $result or $want_help ) {
74 "C4::Linker::" . ( C4::Context->preference("LinkerModule") || 'Default' );
75 unless ( can_load( modules => { $linker_module => undef } ) ) {
76 $linker_module = 'C4::Linker::Default';
77 unless ( can_load( modules => { $linker_module => undef } ) ) {
78 die "Unable to load linker module. Aborting.";
82 my $linker = $linker_module->new(
84 'auth_limit' => $auth_limit,
85 'options' => C4::Context->preference("LinkerOptions")
89 my $num_bibs_processed = 0;
90 my $num_bibs_modified = 0;
92 my %unlinked_headings;
95 my $dbh = C4::Context->dbh;
96 my @updated_biblios = ();
97 my $indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::BIBLIOS_INDEX });
99 my $schema = Koha::Database->schema;
101 process_bibs( $linker, $bib_limit, $auth_limit, $commit, { tagtolink => $tagtolink, allowrelink => $allowrelink });
106 my ( $linker, $bib_limit, $auth_limit, $commit, $args ) = @_;
107 my $tagtolink = $args->{tagtolink};
108 my $allowrelink = $args->{allowrelink};
110 my $starttime = time();
112 $bib_where = "WHERE $bib_limit";
115 "SELECT biblionumber FROM biblio $bib_where ORDER BY biblionumber ASC";
116 my $sth = $dbh->prepare($sql);
118 my $linker_args = { tagtolink => $tagtolink, allowrelink => $allowrelink };
119 while ( my ($biblionumber) = $sth->fetchrow_array() ) {
120 $num_bibs_processed++;
121 process_bib( $linker, $biblionumber, $linker_args );
123 if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
124 print_progress_and_commit($num_bibs_processed);
128 if ( not $test_only ) {
130 $indexer->index_records( \@updated_biblios, "specialUpdate", "biblioserver" );
133 my $headings_linked = 0;
134 my $headings_unlinked = 0;
135 my $headings_fuzzy = 0;
136 for ( values %linked_headings ) { $headings_linked += $_; }
137 for ( values %unlinked_headings ) { $headings_unlinked += $_; }
138 for ( values %fuzzy_headings ) { $headings_fuzzy += $_; }
140 my $endtime = time();
141 my $totaltime = ceil (($endtime - $starttime) * 1000);
142 $starttime = strftime('%D %T', localtime($starttime));
143 $endtime = strftime('%D %T', localtime($endtime));
145 my $summary = <<_SUMMARY_;
147 Bib authority heading linking report
148 =======================================================
149 Linker module: $linker_module
150 Run started at: $starttime
151 Run ended at: $endtime
152 Total run time: $totaltime ms
153 Number of bibs checked: $num_bibs_processed
154 Number of bibs modified: $num_bibs_modified
155 Number of bibs with errors: $num_bad_bibs
156 Number of headings linked: $headings_linked
157 Number of headings unlinked: $headings_unlinked
158 Number of headings fuzzily linked: $headings_fuzzy
160 $summary .= "\n**** Ran in test mode only ****\n" if $test_only;
165 print <<_LINKED_HEADER_;
167 Linked headings (from most frequent to least):
168 -------------------------------------------------------
173 $linked_headings{$b} <=> $linked_headings{$a} or "\L$a" cmp "\L$b"
174 } keys %linked_headings;
175 foreach my $key (@keys) {
176 print "$key:\t" . $linked_headings{$key} . " occurrences\n";
179 print <<_UNLINKED_HEADER_;
181 Unlinked headings (from most frequent to least):
182 -------------------------------------------------------
187 $unlinked_headings{$b} <=> $unlinked_headings{$a}
189 } keys %unlinked_headings;
190 foreach my $key (@keys) {
191 print "$key:\t" . $unlinked_headings{$key} . " occurrences\n";
194 print <<_FUZZY_HEADER_;
196 Fuzzily-matched headings (from most frequent to least):
197 -------------------------------------------------------
202 $fuzzy_headings{$b} <=> $fuzzy_headings{$a} or "\L$a" cmp "\L$b"
203 } keys %fuzzy_headings;
204 foreach my $key (@keys) {
205 print "$key:\t" . $fuzzy_headings{$key} . " occurrences\n";
213 my $biblionumber = shift;
215 my $tagtolink = $args->{tagtolink};
216 my $allowrelink = $args->{allowrelink};
217 my $biblio = Koha::Biblios->find($biblionumber);
219 eval { $record = $biblio->metadata->record; };
220 unless ( defined $record ) {
221 warn "Could not retrieve bib $biblionumber from the database - record is corrupt.";
226 my $frameworkcode = GetFrameworkCode($biblionumber);
228 my ( $headings_changed, $results );
231 ( $headings_changed, $results ) =
232 LinkBibHeadingsToAuthorities( $linker, $record, $frameworkcode, $allowrelink, $tagtolink );
235 warn "Error while searching for authorities for biblionumber $biblionumber at " . localtime(time);
240 foreach my $key ( keys %{ $results->{'unlinked'} } ) {
241 $unlinked_headings{$key} += $results->{'unlinked'}->{$key};
243 foreach my $key ( keys %{ $results->{'linked'} } ) {
244 $linked_headings{$key} += $results->{'linked'}->{$key};
246 foreach my $key ( keys %{ $results->{'fuzzy'} } ) {
247 $fuzzy_headings{$key} += $results->{'fuzzy'}->{$key};
250 if ($headings_changed) {
252 my $title = substr( $record->title, 0, 20 );
254 "Bib %12d (%-20s): %3d headings changed\n",
260 if ( not $test_only ) {
261 ModBiblio( $record, $biblionumber, $frameworkcode, {
262 disable_autolink => 1,
263 skip_holds_queue => 1,
264 skip_record_index =>1
266 push @updated_biblios, $biblionumber;
267 #Last param is to note ModBiblio was called from linking script and bib should not be linked again
268 $num_bibs_modified++;
273 sub print_progress_and_commit {
275 $schema->txn_commit();
276 $indexer->index_records( \@updated_biblios, "specialUpdate", "biblioserver" );
277 @updated_biblios = ();
278 $schema->txn_begin();
279 print "... processed $recs records\n";
284 link_bibs_to_authorities.pl
288 link_bibs_to_authorities.pl
289 link_bibs_to_authorities.pl -v
290 link_bibs_to_authorities.pl -l
291 link_bibs_to_authorities.pl --commit=1000
292 link_bibs_to_authorities.pl --auth-limit=STRING
293 link_bibs_to_authorities.pl --bib-limit=STRING
294 link_bibs_to_authorities.pl -g=700
298 This batch job checks each bib record in the Koha database and attempts to link
299 each of its headings to the matching authority record.
307 =item B<-v|--verbose>
309 Provide verbose log information (print the number of headings changed for each
312 =item B<-l|--link-report>
314 Provide a report of all the headings that were processed: which were matched,
317 =item B<--auth-limit=S>
319 Only process those headings which match an authority record that matches the
320 user-specified WHERE clause.
322 =item B<--bib-limit=S>
324 Only process those bib records that match the user-specified WHERE clause.
328 Commit the results to the database after every N records are processed.
332 Only process those headings found in MARC field N.
336 Only test the authority linking and report the results; do not change the bib