10 LinkBibHeadingsToAuthorities
14 use Getopt::Long qw( GetOptions );
15 use Pod::Usage qw( pod2usage );
16 use Time::HiRes qw( time );
17 use POSIX qw( ceil strftime );
18 use Module::Load::Conditional qw( can_load );
20 use Koha::SearchEngine;
21 use Koha::SearchEngine::Indexer;
24 pod2usage( -verbose => 2 );
30 # command-line parameters
39 my $allowrelink = C4::Context->preference("CatalogModuleRelink") || '';
41 my $result = GetOptions(
42 'v|verbose' => \$verbose,
43 't|test' => \$test_only,
44 'l|link-report' => \$link_report,
45 'a|auth-limit=s' => \$auth_limit,
46 'b|bib-limit=s' => \$bib_limit,
47 'c|commit=i' => \$commit,
48 'g|tagtolink=i' => \$tagtolink,
49 'h|help' => \$want_help
52 binmode( STDOUT, ":encoding(UTF-8)" );
54 if ( not $result or $want_help ) {
59 "C4::Linker::" . ( C4::Context->preference("LinkerModule") || 'Default' );
60 unless ( can_load( modules => { $linker_module => undef } ) ) {
61 $linker_module = 'C4::Linker::Default';
62 unless ( can_load( modules => { $linker_module => undef } ) ) {
63 die "Unable to load linker module. Aborting.";
67 my $linker = $linker_module->new(
69 'auth_limit' => $auth_limit,
70 'options' => C4::Context->preference("LinkerOptions")
74 my $num_bibs_processed = 0;
75 my $num_bibs_modified = 0;
77 my %unlinked_headings;
80 my $dbh = C4::Context->dbh;
81 my @updated_biblios = ();
82 my $indexer = Koha::SearchEngine::Indexer->new({ index => $Koha::SearchEngine::BIBLIOS_INDEX });
84 $dbh->{AutoCommit} = 0;
85 process_bibs( $linker, $bib_limit, $auth_limit, $commit, { tagtolink => $tagtolink, allowrelink => $allowrelink });
91 my ( $linker, $bib_limit, $auth_limit, $commit, $args ) = @_;
92 my $tagtolink = $args->{tagtolink};
93 my $allowrelink = $args->{allowrelink};
95 my $starttime = time();
97 $bib_where = "WHERE $bib_limit";
100 "SELECT biblionumber FROM biblio $bib_where ORDER BY biblionumber ASC";
101 my $sth = $dbh->prepare($sql);
103 my $linker_args = { tagtolink => $tagtolink, allowrelink => $allowrelink };
104 while ( my ($biblionumber) = $sth->fetchrow_array() ) {
105 $num_bibs_processed++;
106 process_bib( $linker, $biblionumber, $linker_args );
108 if ( not $test_only and ( $num_bibs_processed % $commit ) == 0 ) {
109 print_progress_and_commit($num_bibs_processed);
113 if ( not $test_only ) {
114 $indexer->index_records( \@updated_biblios, "specialUpdate", "biblioserver" );
118 my $headings_linked = 0;
119 my $headings_unlinked = 0;
120 my $headings_fuzzy = 0;
121 for ( values %linked_headings ) { $headings_linked += $_; }
122 for ( values %unlinked_headings ) { $headings_unlinked += $_; }
123 for ( values %fuzzy_headings ) { $headings_fuzzy += $_; }
125 my $endtime = time();
126 my $totaltime = ceil (($endtime - $starttime) * 1000);
127 $starttime = strftime('%D %T', localtime($starttime));
128 $endtime = strftime('%D %T', localtime($endtime));
130 my $summary = <<_SUMMARY_;
132 Bib authority heading linking report
133 =======================================================
134 Linker module: $linker_module
135 Run started at: $starttime
136 Run ended at: $endtime
137 Total run time: $totaltime ms
138 Number of bibs checked: $num_bibs_processed
139 Number of bibs modified: $num_bibs_modified
140 Number of bibs with errors: $num_bad_bibs
141 Number of headings linked: $headings_linked
142 Number of headings unlinked: $headings_unlinked
143 Number of headings fuzzily linked: $headings_fuzzy
145 $summary .= "\n**** Ran in test mode only ****\n" if $test_only;
150 print <<_LINKED_HEADER_;
152 Linked headings (from most frequent to least):
153 -------------------------------------------------------
158 $linked_headings{$b} <=> $linked_headings{$a} or "\L$a" cmp "\L$b"
159 } keys %linked_headings;
160 foreach my $key (@keys) {
161 print "$key:\t" . $linked_headings{$key} . " occurrences\n";
164 print <<_UNLINKED_HEADER_;
166 Unlinked headings (from most frequent to least):
167 -------------------------------------------------------
172 $unlinked_headings{$b} <=> $unlinked_headings{$a}
174 } keys %unlinked_headings;
175 foreach my $key (@keys) {
176 print "$key:\t" . $unlinked_headings{$key} . " occurrences\n";
179 print <<_FUZZY_HEADER_;
181 Fuzzily-matched headings (from most frequent to least):
182 -------------------------------------------------------
187 $fuzzy_headings{$b} <=> $fuzzy_headings{$a} or "\L$a" cmp "\L$b"
188 } keys %fuzzy_headings;
189 foreach my $key (@keys) {
190 print "$key:\t" . $fuzzy_headings{$key} . " occurrences\n";
198 my $biblionumber = shift;
200 my $tagtolink = $args->{tagtolink};
201 my $allowrelink = $args->{allowrelink};
202 my $biblio = Koha::Biblios->find($biblionumber);
203 my $record = $biblio->metadata->record;
204 unless ( defined $record ) {
206 "\nCould not retrieve bib $biblionumber from the database - record is corrupt.\n";
211 my $frameworkcode = GetFrameworkCode($biblionumber);
213 my ( $headings_changed, $results ) =
214 LinkBibHeadingsToAuthorities( $linker, $record, $frameworkcode, $allowrelink, $tagtolink );
215 foreach my $key ( keys %{ $results->{'unlinked'} } ) {
216 $unlinked_headings{$key} += $results->{'unlinked'}->{$key};
218 foreach my $key ( keys %{ $results->{'linked'} } ) {
219 $linked_headings{$key} += $results->{'linked'}->{$key};
221 foreach my $key ( keys %{ $results->{'fuzzy'} } ) {
222 $fuzzy_headings{$key} += $results->{'fuzzy'}->{$key};
225 if ($headings_changed) {
227 my $title = substr( $record->title, 0, 20 );
229 "Bib %12d (%-20s): %3d headings changed\n",
235 if ( not $test_only ) {
236 ModBiblio( $record, $biblionumber, $frameworkcode, {
237 disable_autolink => 1,
238 skip_holds_queue => 1,
239 skip_record_index =>1
241 push @updated_biblios, $biblionumber;
242 #Last param is to note ModBiblio was called from linking script and bib should not be linked again
243 $num_bibs_modified++;
248 sub print_progress_and_commit {
251 $indexer->index_records( \@updated_biblios, "specialUpdate", "biblioserver" );
252 @updated_biblios = ();
253 print "... processed $recs records\n";
258 link_bibs_to_authorities.pl
262 link_bibs_to_authorities.pl
263 link_bibs_to_authorities.pl -v
264 link_bibs_to_authorities.pl -l
265 link_bibs_to_authorities.pl --commit=1000
266 link_bibs_to_authorities.pl --auth-limit=STRING
267 link_bibs_to_authorities.pl --bib-limit=STRING
268 link_bibs_to_authorities.pl -g=700
272 This batch job checks each bib record in the Koha database and attempts to link
273 each of its headings to the matching authority record.
281 =item B<-v|--verbose>
283 Provide verbose log information (print the number of headings changed for each
286 =item B<-l|--link-report>
288 Provide a report of all the headings that were processed: which were matched,
291 =item B<--auth-limit=S>
293 Only process those headings which match an authority record that matches the
294 user-specified WHERE clause.
296 =item B<--bib-limit=S>
298 Only process those bib records that match the user-specified WHERE clause.
302 Commit the results to the database after every N records are processed.
306 Only process those headings found in MARC field N.
310 Only test the authority linking and report the results; do not change the bib