Browse Source

more work on batch import

* Completely removed old marc_breeding table
* Started updated Tools import function to stage records

Signed-off-by: Chris Cormack <crc@liblime.com>
Signed-off-by: Joshua Ferraro <jmf@liblime.com>
3.0.x
Galen Charlton 17 years ago
committed by Joshua Ferraro
parent
commit
2e07983367
  1. 8
      C4/Breeding.pm
  2. 339
      C4/ImportBatch.pm
  3. 4
      C4/Z3950.pm
  4. 20
      installer/kohastructure.sql
  5. 4
      koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl
  6. 26
      tools/import.pl
  7. 28
      updater/updatedatabase

8
C4/Breeding.pm

@ -31,7 +31,8 @@ $VERSION = 0.01;
=head1 NAME
C4::Breeding : script to add a biblio in marc_breeding table.
C4::Breeding : module to add biblios to import_records via
the breeding/reservoir API.
=head1 SYNOPSIS
@ -50,7 +51,7 @@ C4::Breeding : script to add a biblio in marc_breeding table.
=head1 DESCRIPTION
ImportBreeding import MARC records in the reservoir (marc_breeding table).
ImportBreeding import MARC records in the reservoir (import_records/import_batches tables).
the records can be properly encoded or not, we try to reencode them in utf-8 if needed.
works perfectly with BNF server, that sends UNIMARC latin1 records. Should work with other servers too.
the FixEncoding sub is in Koha.pm, as it's a general usage sub.
@ -161,7 +162,8 @@ C<$isbn> contains isbn or issn,
C<$random> contains the random seed from a z3950 search.
C<$count> is the number of items in C<@results>. C<@results> is an
array of references-to-hash; the keys are the items from the C<marc_breeding> table of the Koha database.
array of references-to-hash; the keys are the items from the C<import_records> and
C<import_biblios> tables of the Koha database.
=cut

339
C4/ImportBatch.pm

@ -21,6 +21,7 @@ use strict;
use C4::Context;
use C4::Koha;
use C4::Biblio;
use C4::Matcher;
require Exporter;
@ -52,6 +53,18 @@ use C4::ImportBatch;
AddImportBatch
AddBiblioToBatch
ModBiblioInBatch
BatchStageMarcRecords
BatchFindBibDuplicates
BatchCommitBibRecords
GetImportBatchStatus
SetImportBatchStatus
GetImportBatchOverlayAction
SetImportBatchOverlayAction
GetImportRecordOverlayStatus
SetImportRecordOverlayStatus
SetImportRecordMatches
);
=head2 GetZ3950BatchId
@ -140,12 +153,14 @@ sub AddImportBatch {
my $import_record_id = AddBiblioToBatch($batch_id, $record_sequence, $marc_record, $encoding, $z3950random);
=back
=cut
sub AddBiblioToBatch {
my ($batch_id, $record_sequence, $marc_record, $encoding, $z3950random) = @_;
my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'bib', $encoding, $z3950random);
my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'biblio', $encoding, $z3950random);
_add_biblio_fields($import_record_id, $marc_record);
return $import_record_id;
}
@ -156,6 +171,8 @@ sub AddBiblioToBatch {
ModBiblioInBatch($import_record_id, $marc_record);
=back
=cut
sub ModBiblioInBatch {
@ -166,6 +183,326 @@ sub ModBiblioInBatch {
}
=head2 BatchStageMarcRecords
=over 4
($batch_id, $num_records, @invalid_records) = BatchStageMarcRecords($marc_flavor, $marc_records, $file_name,
$comments, $branch_code, $leave_as_staging);
=back
=cut
sub BatchStageMarcRecords {
my ($marc_flavor, $marc_records, $file_name, $comments, $branch_code, $leave_as_staging) = @_;
my $batch_id = AddImportBatch('create_new', 'staging', 'batch', $file_name, $comments);
my @invalid_records = ();
my $num_valid = 0;
# FIXME - for now, we're dealing only with bibs
my $rec_num = 0;
foreach my $marc_blob (split(/\x1D/, $marc_records)) {
$rec_num++;
my $marc_record = FixEncoding($marc_blob, "\x1D");
my $import_record_id;
if (scalar($marc_record->fields()) == 0) {
push @invalid_records, $marc_blob;
} else {
$num_valid++;
$import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $marc_flavor, int(rand(99999)));
}
}
unless ($leave_as_staging) {
SetImportBatchStatus($batch_id, 'staged');
}
# FIXME batch_code, number of bibs, number of items
return ($batch_id, $num_valid, @invalid_records);
}
=head2 BatchFindBibDuplicates
=over4
my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, $max_matches);
=back
Goes through the records loaded in the batch and attempts to
find duplicates for each one. Sets the overlay action to
'replace' if it was 'create_new', and sets the overlay status
of each record to 'no_match' or 'auto_match' as appropriate.
The $max_matches parameter is optional; if it is not supplied,
it defaults to 10.
=cut
sub BatchFindBibDuplicates {
my $batch_id = shift;
my $matcher = shift;
my $max_matches = @_ ? shift : 10;
my $dbh = C4::Context->dbh;
my $old_overlay_action = GetImportBatchOverlayAction($batch_id);
if ($old_overlay_action eq "create_new") {
SetImportBatchOverlayAction($batch_id, 'replace');
}
my $sth = $dbh->prepare("SELECT import_record_id, marc
FROM import_records
JOIN import_biblios USING (import_record_id)
WHERE import_batch_id = ?");
$sth->execute($batch_id);
my $num_with_matches = 0;
while (my $rowref = $sth->fetchrow_hashref) {
my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
my @matches = $matcher->get_matches($marc_record, $max_matches);
if (scalar(@matches) > 0) {
$num_with_matches++;
SetImportRecordMatches($rowref->{'import_record_id'}, @matches);
SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'auto_match');
} else {
SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'no_match');
}
}
$sth->finish();
return $num_with_matches;
}
=head2 BatchCommitBibRecords
=over 4
my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id);
=back
=cut
sub BatchCommitBibRecords {
my $batch_id = shift;
my $num_added = 0;
my $num_updated = 0;
my $num_ignored = 0;
# commit (i.e., save, all records in the batch)
# FIXME biblio only at the moment
SetImportBatchStatus('importing');
my $overlay_action = GetImportBatchOverlayAction($batch_id);
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marc
FROM import_records
JOIN import_biblios USING (import_record_id)
WHERE import_batch_id = ?");
$sth->execute($batch_id);
while (my $rowref = $sth->fetchrow_hashref) {
if ($rowref->{'status'} eq 'error' or $rowref->{'status'} eq 'imported') {
$num_ignored++;
}
my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
if ($overlay_action eq 'create_new' or
($overlay_action eq 'replace' and $rowref->{'overlay_status'} eq 'no_match')) {
$num_added++;
my ($biblionumber, $biblioitemnumber) = AddBiblio($marc_record, '');
} else {
$num_updated++;
my $biblionumber = GetBestRecordMatch($rowref->{'import_record_id'});
my ($count, $oldbiblio) = GetBiblio($biblionumber);
my $oldxml = GetXmlBiblio($biblionumber);
ModBiblio($marc_record, $biblionumber, $oldbiblio->{'frameworkcode'});
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("UPDATE import_records SET marcxml_old = ? WHERE import_record_id = ?");
$sth->execute($oldxml, $rowref->{'import_record_id'});
$sth->finish();
SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'match_applied');
}
}
$sth->finish();
SetImportBatchStatus('imported');
return ($num_added, $num_updated, $num_ignored);
}
=head2 GetBestRecordMatch
=over 4
my $record_id = GetBestRecordMatch($import_record_id);
=back
=cut
sub GetBestRecordMatch {
my ($import_record_id) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("SELECT candidate_match_id
FROM import_record_matches
WHERE import_record_id = ?
ORDER BY score DESC, candidate_match_id DESC");
$sth->execute($import_record_id);
my ($record_id) = $sth->fetchrow_array();
$sth->finish();
return $record_id;
}
=head2 GetImportBatchStatus
=over 4
my $status = GetImportBatchStatus($batch_id);
=back
=cut
sub GetImportBatchStatus {
my ($batch_id) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("SELECT import_status FROM import_batches WHERE batch_id = ?");
$sth->execute($batch_id);
my ($status) = $sth->fetchrow_array();
$sth->finish();
return;
}
=head2 SetImportBatchStatus
=over 4
SetImportBatchStatus($batch_id, $new_status);
=back
=cut
sub SetImportBatchStatus {
my ($batch_id, $new_status) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("UPDATE import_batches SET import_status = ? WHERE import_batch_id = ?");
$sth->execute($new_status, $batch_id);
$sth->finish();
}
=head2 GetImportBatchOverlayAction
=over 4
my $overlay_action = GetImportBatchOverlayAction($batch_id);
=back
=cut
sub GetImportBatchOverlayAction {
my ($batch_id) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("SELECT overlay_action FROM import_batches WHERE import_batch_id = ?");
$sth->execute($batch_id);
my ($overlay_action) = $sth->fetchrow_array();
$sth->finish();
return $overlay_action;
}
=head2 SetImportBatchOverlayAction
=over 4
SetImportBatchOverlayAction($batch_id, $new_overlay_action);
=back
=cut
sub SetImportBatchOverlayAction {
my ($batch_id, $new_overlay_action) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("UPDATE import_batches SET overlay_action = ? WHERE import_batch_id = ?");
$sth->execute($new_overlay_action, $batch_id);
$sth->finish();
}
=head2 GetImportRecordOverlayStatus
=over 4
my $overlay_status = GetImportRecordOverlayStatus($import_record_id);
=back
=cut
sub GetImportRecordOverlayStatus {
my ($import_record_id) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("SELECT overlay_status FROM import_records WHERE import_record_id = ?");
$sth->execute($import_record_id);
my ($overlay_status) = $sth->fetchrow_array();
$sth->finish();
return $overlay_status;
}
=head2 SetImportRecordOverlayStatus
=over 4
SetImportRecordOverlayStatus($import_record_id, $new_overlay_status);
=back
=cut
sub SetImportRecordOverlayStatus {
my ($import_record_id, $new_overlay_status) = @_;
my $dbh = C4::Context->dbh;
my $sth = $dbh->prepare("UPDATE import_records SET overlay_status = ? WHERE import_record_id = ?");
$sth->execute($new_overlay_status, $import_record_id);
$sth->finish();
}
=head2 SetImportRecordMatches
=over 4
SetImportRecordMatches($import_record_id, @matches);
=back
=cut
sub SetImportRecordMatches {
my $import_record_id = shift;
my @matches = @_;
my $dbh = C4::Context->dbh;
my $delsth = $dbh->prepare("DELETE FROM import_record_matches WHERE import_record_id = ?");
$delsth->execute($import_record_id);
$delsth->finish();
my $sth = $dbh->prepare("INSERT INTO import_record_matches (import_record_id, candidate_match_id, score)
VALUES (?, ?, ?)");
foreach my $match (@matches) {
$sth->execute($import_record_id, $match->{'record_id'}, $match->{'score'});
}
}
# internal functions
sub _create_import_record {

4
C4/Z3950.pm

@ -319,13 +319,13 @@ Koha Developement team <info@koha.org>
# * a "search z3950" button is added in the addbiblio template.
# * when clicked, a popup appears and z3950/search.pl is called
# * z3950/search.pl calls addz3950search in the DB
# * the z3950 daemon retrieve the records and stores them in z3950results AND in marc_breeding table.
# * the z3950 daemon retrieve the records and stores them in import_batches/import_records/import_biblios tables.
# * as long as there as searches pending, the popup auto refresh every 2 seconds, and says how many searches are pending.
# * when the user clicks on a z3950 result => the parent popup is called with the requested biblio, and auto-filled
#
# Note :
# * character encoding support : (It's a nightmare...) In the z3950servers table, a "encoding" column has been added. You can put "UNIMARC" or "USMARC" in this column. Depending on this, the char_decode in C4::Biblio.pm replaces marc-char-encode by an iso 8859-1 encoding. Note that in the breeding import this value has been added too, for a better support.
# * the marc_breeding and z3950* tables have been modified : they have an encoding column and the random z3950 number is stored too for convenience => it's the key I use to list only requested biblios in the popup.
# * the mport_records and z3950* tables have been modified : they have an encoding column and the random z3950 number is stored too for convenience => it's the key I use to list only requested biblios in the popup.
#
# Revision 1.8 2003/04/29 08:09:45 tipaul
# z3950 support is coming...

20
installer/kohastructure.sql

@ -1135,26 +1135,6 @@ CREATE TABLE `letter` (
PRIMARY KEY (`module`,`code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--
-- Table structure for table `marc_breeding`
--
DROP TABLE IF EXISTS `marc_breeding`;
CREATE TABLE `marc_breeding` (
`id` bigint(20) NOT NULL auto_increment,
`file` varchar(80) NOT NULL default '',
`isbn` varchar(10) NOT NULL default '',
`title` varchar(128) default NULL,
`author` varchar(80) default NULL,
`marc` longblob,
`encoding` varchar(40) NOT NULL default '',
`z3950random` varchar(40) default NULL,
PRIMARY KEY (`id`),
KEY `title` (`title`),
KEY `isbn` (`isbn`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--
-- Table structure for table `marc_subfield_structure`
--

4
koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl

@ -43,9 +43,9 @@
</li>
<li>
<label for="filename">Name of this import: </label>
<label for="comments">Notes about this import: </label>
<input type="text" id="filename" name="filename" />
<input type="text" id="comments" name="comments" />
</li>
<li>

26
tools/import.pl

@ -37,6 +37,8 @@ use C4::Input;
use C4::Output;
use C4::Biblio;
use C4::Breeding;
use C4::ImportBatch;
use C4::Matcher;
#------------------
# Constants
@ -59,7 +61,7 @@ my $dbh = C4::Context->dbh;
my $uploadmarc=$input->param('uploadmarc');
my $overwrite_biblio = $input->param('overwrite_biblio');
my $filename = $input->param('filename');
my $comments = $input->param('comments');
my $syntax = $input->param('syntax');
my ($template, $loggedinuser, $cookie)
= get_template_and_user({template_name => "tools/import.tmpl",
@ -72,18 +74,26 @@ my ($template, $loggedinuser, $cookie)
$template->param(SCRIPT_NAME => $ENV{'SCRIPT_NAME'},
uploadmarc => $uploadmarc);
my $filename = $uploadmarc;
if ($uploadmarc && length($uploadmarc)>0) {
my $marcrecord='';
while (<$uploadmarc>) {
$marcrecord.=$_;
}
my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($marcrecord,$overwrite_biblio,$filename,$syntax,int(rand(99999)), 'batch');
$template->param(imported => $imported,
alreadyindb => $alreadyindb,
alreadyinfarm => $alreadyinfarm,
notmarcrecord => $notmarcrecord,
total => $imported+$alreadyindb+$alreadyinfarm+$notmarcrecord,
#my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($marcrecord,$overwrite_biblio,$filename,$syntax,int(rand(99999)), 'batch');
# FIXME branch code
my ($batch_id, $num_valid, @import_errors) = BatchStageMarcRecords($syntax, $marcrecord, $filename, $comments, '', 1);
my $matcher = C4::Matcher->new('biblio');
$matcher->add_matchpoint("020", "a", '', 'isbn', 1000);
my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher);
my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id);
$template->param(imported => $num_valid,
alreadyindb => $num_with_matches,
alreadyinfarm => 0,
notmarcrecord => scalar(@import_errors),
total => $num_valid + scalar(@import_errors)
);
}

28
updater/updatedatabase

@ -423,6 +423,34 @@ if (C4::Context->preference("Version") < TransformToNum($DBversion)) {
KEY `itemnumber` (`itemnumber`),
KEY `branchcode` (`branchcode`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8");
$dbh->do("INSERT INTO `import_batches`
(`overlay_action`, `import_status`, `batch_type`, `file_name`)
SELECT distinct 'create_new', 'staged', 'z3950', `file`
FROM `marc_breeding`");
$dbh->do("INSERT INTO `import_records`
(`import_batch_id`, `record_sequence`, `marc`, `record_type`, `status`,
`encoding`, `z3950random`, `marcxml`, `marcxml_old`)
SELECT `import_batch_id`, 1, `marc`, 'biblio', 'staged', `encoding`, `z3950random`, '', ''
FROM `marc_breeding`
JOIN `import_batches` ON (`file_name` = `file`)");
$dbh->do("INSERT INTO `import_biblios`
(`import_record_id`, `title`, `author`, `isbn`)
SELECT `import_record_id`, `title`, `author`, `isbn`
FROM `marc_breeding`
JOIN `import_records` USING (`z3950random`)");
$dbh->do("UPDATE `import_batches`
SET `num_biblios` = (
SELECT COUNT(*)
FROM `import_records`
WHERE `import_batch_id` = `import_batches`.`import_batch_id`
)");
$dbh->do("DROP TABLE `marc_breeding`");
SetVersion ($DBversion);
}

Loading…
Cancel
Save