new batch job to stage a file of MARC biblios for import.
As part of this, modified two routines in C4::ImportBatch to support a callback for monitor progress of import processing. Signed-off-by: Chris Cormack <crc@liblime.com> Signed-off-by: Joshua Ferraro <jmf@liblime.com>
This commit is contained in:
parent
979282933f
commit
3fbd25602b
2 changed files with 175 additions and 4 deletions
|
@ -231,15 +231,33 @@ sub ModBiblioInBatch {
|
|||
($batch_id, $num_records, $num_items, @invalid_records) =
|
||||
BatchStageMarcRecords($marc_flavor, $marc_records, $file_name,
|
||||
$comments, $branch_code, $parse_items,
|
||||
$leave_as_staging);
|
||||
$leave_as_staging,
|
||||
$progress_interval, $progress_callback);
|
||||
|
||||
=back
|
||||
|
||||
=cut
|
||||
|
||||
sub BatchStageMarcRecords {
|
||||
my ($marc_flavor, $marc_records, $file_name, $comments, $branch_code, $parse_items, $leave_as_staging) = @_;
|
||||
|
||||
my $marc_flavor = shift;
|
||||
my $marc_records = shift;
|
||||
my $file_name = shift;
|
||||
my $comments = shift;
|
||||
my $branch_code = shift;
|
||||
my $parse_items = shift;
|
||||
my $leave_as_staging = shift;
|
||||
|
||||
# optional callback to monitor status
|
||||
# of job
|
||||
my $progress_interval = 0;
|
||||
my $progress_callback = undef;
|
||||
if ($#_ == 1) {
|
||||
$progress_interval = shift;
|
||||
$progress_callback = shift;
|
||||
$progress_interval = 0 unless $progress_interval =~ /^\d+$/ and $progress_interval > 0;
|
||||
$progress_interval = 0 unless 'CODE' eq ref $progress_callback;
|
||||
}
|
||||
|
||||
my $batch_id = AddImportBatch('create_new', 'staging', 'batch', $file_name, $comments);
|
||||
my @invalid_records = ();
|
||||
my $num_valid = 0;
|
||||
|
@ -248,6 +266,9 @@ sub BatchStageMarcRecords {
|
|||
my $rec_num = 0;
|
||||
foreach my $marc_blob (split(/\x1D/, $marc_records)) {
|
||||
$rec_num++;
|
||||
if ($progress_interval and (0 == ($rec_num % $progress_interval))) {
|
||||
&$progress_callback($rec_num);
|
||||
}
|
||||
my $marc_record = FixEncoding($marc_blob, "\x1D");
|
||||
my $import_record_id;
|
||||
if (scalar($marc_record->fields()) == 0) {
|
||||
|
@ -314,7 +335,7 @@ sub AddItemsToImportBiblio {
|
|||
|
||||
=over 4
|
||||
|
||||
my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, $max_matches);
|
||||
my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, $max_matches, $progress_interval, $progress_callback);
|
||||
|
||||
=back
|
||||
|
||||
|
@ -326,6 +347,12 @@ of each record to "no_match" or "auto_match" as appropriate.
|
|||
The $max_matches parameter is optional; if it is not supplied,
|
||||
it defaults to 10.
|
||||
|
||||
The $progress_interval and $progress_callback parameters are
|
||||
optional; if both are supplied, the sub referred to by
|
||||
$progress_callback will be invoked every $progress_interval
|
||||
records using the number of records processed as the
|
||||
singular argument.
|
||||
|
||||
=cut
|
||||
|
||||
sub BatchFindBibDuplicates {
|
||||
|
@ -333,6 +360,17 @@ sub BatchFindBibDuplicates {
|
|||
my $matcher = shift;
|
||||
my $max_matches = @_ ? shift : 10;
|
||||
|
||||
# optional callback to monitor status
|
||||
# of job
|
||||
my $progress_interval = 0;
|
||||
my $progress_callback = undef;
|
||||
if ($#_ == 1) {
|
||||
$progress_interval = shift;
|
||||
$progress_callback = shift;
|
||||
$progress_interval = 0 unless $progress_interval =~ /^\d+$/ and $progress_interval > 0;
|
||||
$progress_interval = 0 unless 'CODE' eq ref $progress_callback;
|
||||
}
|
||||
|
||||
my $dbh = C4::Context->dbh;
|
||||
my $old_overlay_action = GetImportBatchOverlayAction($batch_id);
|
||||
if ($old_overlay_action eq "create_new") {
|
||||
|
@ -345,7 +383,12 @@ sub BatchFindBibDuplicates {
|
|||
WHERE import_batch_id = ?");
|
||||
$sth->execute($batch_id);
|
||||
my $num_with_matches = 0;
|
||||
my $rec_num = 0;
|
||||
while (my $rowref = $sth->fetchrow_hashref) {
|
||||
$rec_num++;
|
||||
if ($progress_interval and (0 == ($rec_num % $progress_interval))) {
|
||||
&$progress_callback($rec_num);
|
||||
}
|
||||
my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
|
||||
my @matches = $matcher->get_matches($marc_record, $max_matches);
|
||||
if (scalar(@matches) > 0) {
|
||||
|
|
128
misc/stage_biblios_file.pl
Executable file
128
misc/stage_biblios_file.pl
Executable file
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
|
||||
use C4::Context;
|
||||
use C4::ImportBatch;
|
||||
use C4::Matcher;
|
||||
use Getopt::Long;
|
||||
|
||||
$| = 1;
|
||||
|
||||
# command-line parameters
|
||||
my $match_bibs = 0;
|
||||
my $add_items = 0;
|
||||
my $input_file = "";
|
||||
my $batch_comment = "";
|
||||
my $want_help = 0;
|
||||
|
||||
my $result = GetOptions(
|
||||
'file:s' => \$input_file,
|
||||
'match-bibs' => \$match_bibs,
|
||||
'add-items' => \$add_items,
|
||||
'comment:s' => \$batch_comment,
|
||||
'h|help' => \$want_help
|
||||
);
|
||||
|
||||
if (not $result or $input_file eq "" or $want_help) {
|
||||
print_usage();
|
||||
exit 0;
|
||||
}
|
||||
|
||||
unless (-r $input_file) {
|
||||
die "$0: cannot open input file $input_file: $!\n";
|
||||
}
|
||||
|
||||
process_batch($input_file, $match_bibs, $add_items, $batch_comment);
|
||||
|
||||
exit 0;
|
||||
|
||||
sub process_batch {
|
||||
my ($input_file, $match_bibs, $add_items, $batch_comment) = @_;
|
||||
|
||||
open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n";
|
||||
my $marc_records = "";
|
||||
$/ = "\035";
|
||||
my $num_input_records = 0;
|
||||
while (<IN>) {
|
||||
$marc_records .= $_; # FIXME - this sort of string concatenation
|
||||
# is probably rather inefficient
|
||||
$num_input_records++;
|
||||
}
|
||||
close IN;
|
||||
|
||||
my $marc_flavor = C4::Context->preference('marcflavour');
|
||||
|
||||
print "... staging MARC records -- please wait\n";
|
||||
my ($batch_id, $num_valid, $num_items, @import_errors) =
|
||||
BatchStageMarcRecords($marc_flavor, $marc_records, $input_file, $batch_comment, '', $add_items, 0,
|
||||
100, \&print_progress);
|
||||
print "... finished staging MARC records\n";
|
||||
|
||||
my $num_with_matches = 0;
|
||||
if ($match_bibs) {
|
||||
my $matcher = C4::Matcher->new('biblio');
|
||||
$matcher->add_matchpoint("020", "a", '', 'isbn', 1000);
|
||||
print "... looking for matches with records already in database\n";
|
||||
$num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, 10, 100, \&print_progress);
|
||||
print "... finished looking for matches\n";
|
||||
}
|
||||
|
||||
my $num_invalid_bibs = scalar(@import_errors);
|
||||
print <<_SUMMARY_;
|
||||
|
||||
MARC record staging report
|
||||
------------------------------------
|
||||
Input file: $input_file
|
||||
Number of input bibs: $num_input_records
|
||||
Number of valid bibs: $num_valid
|
||||
Number of invalid bibs: $num_invalid_bibs
|
||||
_SUMMARY_
|
||||
if ($match_bibs) {
|
||||
print "Number of bibs matched: $num_with_matches\n";
|
||||
} else {
|
||||
print "Incoming bibs not matched against existing bibs (--match-bibs option not supplied)\n";
|
||||
}
|
||||
if ($add_items) {
|
||||
print "Number of items parsed: $num_items\n";
|
||||
} else {
|
||||
print "No items parsed (--add-items option not supplied)\n";
|
||||
}
|
||||
|
||||
print "\n";
|
||||
print "Batch number assigned: $batch_id\n";
|
||||
print "\n";
|
||||
}
|
||||
|
||||
sub print_progress {
|
||||
my $recs = shift;
|
||||
print "... processed $recs records\n";
|
||||
}
|
||||
|
||||
sub print_usage {
|
||||
print <<_USAGE_;
|
||||
$0: stage MARC bib file into reservoir.
|
||||
|
||||
Use this batch job to load a file of MARC bibliographic records
|
||||
(with optional item information) into the Koha reservoir.
|
||||
|
||||
After running this program to stage your file, you can use
|
||||
either the batch job commit_biblios_file.pl or the Koha
|
||||
Tools option "Manage Staged MARC Records" to load the
|
||||
records into the main Koha database.
|
||||
|
||||
Parameters:
|
||||
--file <file_name> name of input MARC bib file
|
||||
--match-bibs use this option to match bibs
|
||||
in the file with bibs already in
|
||||
the database for future overlay.
|
||||
--add-items use this option to specify that
|
||||
item data is embedded in the MARC
|
||||
bibs and should be parsed.
|
||||
--comment <comment> optional comment to describe
|
||||
the record batch; if the comment
|
||||
has spaces in it, surround the
|
||||
comment with quotation marks.
|
||||
--help or -h show this message.
|
||||
_USAGE_
|
||||
}
|
Loading…
Reference in a new issue