more work on batch import

* Completely removed old marc_breeding table * Started updated Tools import function to stage records Signed-off-by: Chris Cormack <crc@liblime.com> Signed-off-by: Joshua Ferraro <jmf@liblime.com>
17 years ago · 2e07983367
7 changed files with 393 additions and 36 deletions
--- a/C4/Breeding.pm
+++ b/C4/Breeding.pm
@ -31,7 +31,8 @@ $VERSION = 0.01;

 =head1 NAME

-C4::Breeding : script to add a biblio in marc_breeding table.
+C4::Breeding : module to add biblios to import_records via
+               the breeding/reservoir API.

 =head1 SYNOPSIS

@ -50,7 +51,7 @@ C4::Breeding : script to add a biblio in marc_breeding table.

 =head1 DESCRIPTION

-    ImportBreeding import MARC records in the reservoir (marc_breeding table).
+    ImportBreeding import MARC records in the reservoir (import_records/import_batches tables).
    the records can be properly encoded or not, we try to reencode them in utf-8 if needed.
    works perfectly with BNF server, that sends UNIMARC latin1 records. Should work with other servers too.
    the FixEncoding sub is in Koha.pm, as it's a general usage sub.
@ -161,7 +162,8 @@ C<$isbn> contains isbn or issn,
 C<$random> contains the random seed from a z3950 search.

 C<$count> is the number of items in C<@results>. C<@results> is an
-array of references-to-hash; the keys are the items from the C<marc_breeding> table of the Koha database.
+array of references-to-hash; the keys are the items from the C<import_records> and
+C<import_biblios> tables of the Koha database.

 =cut

--- a/C4/ImportBatch.pm
+++ b/C4/ImportBatch.pm
@ -21,6 +21,7 @@ use strict;
 use C4::Context;
 use C4::Koha;
 use C4::Biblio;
+use C4::Matcher;
 require Exporter;


@ -52,6 +53,18 @@ use C4::ImportBatch;
    AddImportBatch
    AddBiblioToBatch
    ModBiblioInBatch
+
+    BatchStageMarcRecords
+    BatchFindBibDuplicates
+    BatchCommitBibRecords
+    
+    GetImportBatchStatus
+    SetImportBatchStatus
+    GetImportBatchOverlayAction
+    SetImportBatchOverlayAction
+    GetImportRecordOverlayStatus
+    SetImportRecordOverlayStatus
+    SetImportRecordMatches
 );

 =head2 GetZ3950BatchId
@ -140,12 +153,14 @@ sub AddImportBatch {

 my $import_record_id = AddBiblioToBatch($batch_id, $record_sequence, $marc_record, $encoding, $z3950random);

+=back
+
 =cut

 sub AddBiblioToBatch {
    my ($batch_id, $record_sequence, $marc_record, $encoding, $z3950random) = @_;

-    my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'bib', $encoding, $z3950random);
+    my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'biblio', $encoding, $z3950random);
    _add_biblio_fields($import_record_id, $marc_record);
    return $import_record_id;
 }
@ -156,6 +171,8 @@ sub AddBiblioToBatch {

 ModBiblioInBatch($import_record_id, $marc_record);

+=back
+
 =cut

 sub ModBiblioInBatch {
@ -166,6 +183,326 @@ sub ModBiblioInBatch {

 }

+=head2 BatchStageMarcRecords
+
+=over 4
+
+($batch_id, $num_records, @invalid_records) = BatchStageMarcRecords($marc_flavor, $marc_records, $file_name, 
+                                                                    $comments, $branch_code, $leave_as_staging);
+
+=back
+
+=cut
+
+sub  BatchStageMarcRecords {
+    my ($marc_flavor, $marc_records, $file_name, $comments, $branch_code, $leave_as_staging) = @_;
+
+    my $batch_id = AddImportBatch('create_new', 'staging', 'batch', $file_name, $comments);
+    my @invalid_records = ();
+    my $num_valid = 0;
+    # FIXME - for now, we're dealing only with bibs
+    my $rec_num = 0;
+    foreach my $marc_blob (split(/\x1D/, $marc_records)) {
+        $rec_num++;
+        my $marc_record = FixEncoding($marc_blob, "\x1D");
+        my $import_record_id;
+        if (scalar($marc_record->fields()) == 0) {
+            push @invalid_records, $marc_blob;
+        } else {
+            $num_valid++;
+            $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $marc_flavor, int(rand(99999)));
+        }
+    }
+    unless ($leave_as_staging) {
+        SetImportBatchStatus($batch_id, 'staged');
+    }
+    # FIXME batch_code, number of bibs, number of items
+    return ($batch_id, $num_valid, @invalid_records);
+}
+
+=head2 BatchFindBibDuplicates
+
+=over4
+
+my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, $max_matches);
+
+=back
+
+Goes through the records loaded in the batch and attempts to 
+find duplicates for each one.  Sets the overlay action to
+'replace' if it was 'create_new', and sets the overlay status
+of each record to 'no_match' or 'auto_match' as appropriate.
+
+The $max_matches parameter is optional; if it is not supplied,
+it defaults to 10.
+
+=cut
+
+sub BatchFindBibDuplicates {
+    my $batch_id = shift;
+    my $matcher = shift;
+    my $max_matches = @_ ? shift : 10;
+
+    my $dbh = C4::Context->dbh;
+    my $old_overlay_action = GetImportBatchOverlayAction($batch_id);
+    if ($old_overlay_action eq "create_new") {
+        SetImportBatchOverlayAction($batch_id, 'replace');
+    }
+
+    my $sth = $dbh->prepare("SELECT import_record_id, marc
+                             FROM import_records
+                             JOIN import_biblios USING (import_record_id)
+                             WHERE import_batch_id = ?");
+    $sth->execute($batch_id);
+    my $num_with_matches = 0;
+    while (my $rowref = $sth->fetchrow_hashref) {
+        my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
+        my @matches = $matcher->get_matches($marc_record, $max_matches);
+        if (scalar(@matches) > 0) {
+            $num_with_matches++;
+            SetImportRecordMatches($rowref->{'import_record_id'}, @matches);
+            SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'auto_match');
+        } else {
+            SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'no_match');
+        }
+    }
+    $sth->finish();
+    return $num_with_matches;
+}
+
+=head2 BatchCommitBibRecords
+
+=over 4
+
+my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id);
+
+=back
+
+=cut
+
+sub BatchCommitBibRecords {
+    my $batch_id = shift;
+
+    my $num_added = 0;
+    my $num_updated = 0;
+    my $num_ignored = 0;
+    # commit (i.e., save, all records in the batch)
+    # FIXME biblio only at the moment
+    SetImportBatchStatus('importing');
+    my $overlay_action = GetImportBatchOverlayAction($batch_id);
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marc
+                             FROM import_records
+                             JOIN import_biblios USING (import_record_id)
+                             WHERE import_batch_id = ?");
+    $sth->execute($batch_id);
+    while (my $rowref = $sth->fetchrow_hashref) {
+        if ($rowref->{'status'} eq 'error' or $rowref->{'status'} eq 'imported') {
+            $num_ignored++;
+        }
+        my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
+        if ($overlay_action eq 'create_new' or
+            ($overlay_action eq 'replace' and $rowref->{'overlay_status'} eq 'no_match')) {
+            $num_added++;
+            my ($biblionumber, $biblioitemnumber) = AddBiblio($marc_record, '');
+        } else {
+            $num_updated++;
+            my $biblionumber = GetBestRecordMatch($rowref->{'import_record_id'});
+            my ($count, $oldbiblio) = GetBiblio($biblionumber);
+            my $oldxml = GetXmlBiblio($biblionumber);
+            ModBiblio($marc_record, $biblionumber, $oldbiblio->{'frameworkcode'});
+            my $dbh = C4::Context->dbh;
+            my $sth = $dbh->prepare("UPDATE import_records SET marcxml_old = ? WHERE import_record_id = ?");
+            $sth->execute($oldxml, $rowref->{'import_record_id'});
+            $sth->finish();
+            SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'match_applied');
+        }
+    }
+    $sth->finish();
+    SetImportBatchStatus('imported');
+    return ($num_added, $num_updated, $num_ignored);
+}
+
+=head2 GetBestRecordMatch
+
+=over 4
+
+my $record_id = GetBestRecordMatch($import_record_id);
+
+=back
+
+=cut
+
+sub GetBestRecordMatch {
+    my ($import_record_id) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("SELECT candidate_match_id
+                             FROM   import_record_matches
+                             WHERE  import_record_id = ?
+                             ORDER BY score DESC, candidate_match_id DESC");
+    $sth->execute($import_record_id);
+    my ($record_id) = $sth->fetchrow_array();
+    $sth->finish();
+    return $record_id;
+}
+
+=head2 GetImportBatchStatus
+
+=over 4
+
+my $status = GetImportBatchStatus($batch_id);
+
+=back
+
+=cut
+
+sub GetImportBatchStatus {
+    my ($batch_id) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("SELECT import_status FROM import_batches WHERE batch_id = ?");
+    $sth->execute($batch_id);
+    my ($status) = $sth->fetchrow_array();
+    $sth->finish();
+    return;
+
+}
+
+
+=head2 SetImportBatchStatus
+
+=over 4
+
+SetImportBatchStatus($batch_id, $new_status);
+
+=back
+
+=cut
+
+sub SetImportBatchStatus {
+    my ($batch_id, $new_status) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("UPDATE import_batches SET import_status = ? WHERE import_batch_id = ?");
+    $sth->execute($new_status, $batch_id);
+    $sth->finish();
+
+}
+
+=head2 GetImportBatchOverlayAction
+
+=over 4
+
+my $overlay_action = GetImportBatchOverlayAction($batch_id);
+
+=back
+
+=cut
+
+sub GetImportBatchOverlayAction {
+    my ($batch_id) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("SELECT overlay_action FROM import_batches WHERE import_batch_id = ?");
+    $sth->execute($batch_id);
+    my ($overlay_action) = $sth->fetchrow_array();
+    $sth->finish();
+    return $overlay_action;
+
+}
+
+
+=head2 SetImportBatchOverlayAction
+
+=over 4
+
+SetImportBatchOverlayAction($batch_id, $new_overlay_action);
+
+=back
+
+=cut
+
+sub SetImportBatchOverlayAction {
+    my ($batch_id, $new_overlay_action) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("UPDATE import_batches SET overlay_action = ? WHERE import_batch_id = ?");
+    $sth->execute($new_overlay_action, $batch_id);
+    $sth->finish();
+
+}
+
+=head2 GetImportRecordOverlayStatus
+
+=over 4
+
+my $overlay_status = GetImportRecordOverlayStatus($import_record_id);
+
+=back
+
+=cut
+
+sub GetImportRecordOverlayStatus {
+    my ($import_record_id) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("SELECT overlay_status FROM import_records WHERE import_record_id = ?");
+    $sth->execute($import_record_id);
+    my ($overlay_status) = $sth->fetchrow_array();
+    $sth->finish();
+    return $overlay_status;
+
+}
+
+
+=head2 SetImportRecordOverlayStatus
+
+=over 4
+
+SetImportRecordOverlayStatus($import_record_id, $new_overlay_status);
+
+=back
+
+=cut
+
+sub SetImportRecordOverlayStatus {
+    my ($import_record_id, $new_overlay_status) = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $sth = $dbh->prepare("UPDATE import_records SET overlay_status = ? WHERE import_record_id = ?");
+    $sth->execute($new_overlay_status, $import_record_id);
+    $sth->finish();
+
+}
+
+=head2 SetImportRecordMatches
+
+=over 4
+
+SetImportRecordMatches($import_record_id, @matches);
+
+=back
+
+=cut
+
+sub SetImportRecordMatches {
+    my $import_record_id = shift;
+    my @matches = @_;
+
+    my $dbh = C4::Context->dbh;
+    my $delsth = $dbh->prepare("DELETE FROM import_record_matches WHERE import_record_id = ?");
+    $delsth->execute($import_record_id);
+    $delsth->finish();
+
+    my $sth = $dbh->prepare("INSERT INTO import_record_matches (import_record_id, candidate_match_id, score)
+                                    VALUES (?, ?, ?)");
+    foreach my $match (@matches) {
+        $sth->execute($import_record_id, $match->{'record_id'}, $match->{'score'});
+    }
+}
+
+
 # internal functions

 sub _create_import_record {
--- a/C4/Z3950.pm
+++ b/C4/Z3950.pm
@ -319,13 +319,13 @@ Koha Developement team <info@koha.org>
 # * a "search z3950" button is added in the addbiblio template.
 # * when clicked, a popup appears and z3950/search.pl is called
 # * z3950/search.pl calls addz3950search in the DB
-# * the z3950 daemon retrieve the records and stores them in z3950results AND in marc_breeding table.
+# * the z3950 daemon retrieve the records and stores them in import_batches/import_records/import_biblios tables.
 # * as long as there as searches pending, the popup auto refresh every 2 seconds, and says how many searches are pending.
 # * when the user clicks on a z3950 result => the parent popup is called with the requested biblio, and auto-filled
 #
 # Note :
 # * character encoding support : (It's a nightmare...) In the z3950servers table, a "encoding" column has been added. You can put "UNIMARC" or "USMARC" in this column. Depending on this, the char_decode in C4::Biblio.pm replaces marc-char-encode by an iso 8859-1 encoding. Note that in the breeding import this value has been added too, for a better support.
-# * the marc_breeding and z3950* tables have been modified : they have an encoding column and the random z3950 number is stored too for convenience => it's the key I use to list only requested biblios in the popup.
+# * the mport_records and z3950* tables have been modified : they have an encoding column and the random z3950 number is stored too for convenience => it's the key I use to list only requested biblios in the popup.
 #
 # Revision 1.8  2003/04/29 08:09:45  tipaul
 # z3950 support is coming...
--- a/installer/kohastructure.sql
+++ b/installer/kohastructure.sql
@ -1135,26 +1135,6 @@ CREATE TABLE `letter` (
  PRIMARY KEY  (`module`,`code`)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;

-
--
-- Table structure for table `marc_breeding`
--
-
-DROP TABLE IF EXISTS `marc_breeding`;
-CREATE TABLE `marc_breeding` (
-  `id` bigint(20) NOT NULL auto_increment,
-  `file` varchar(80) NOT NULL default '',
-  `isbn` varchar(10) NOT NULL default '',
-  `title` varchar(128) default NULL,
-  `author` varchar(80) default NULL,
-  `marc` longblob,
-  `encoding` varchar(40) NOT NULL default '',
-  `z3950random` varchar(40) default NULL,
-  PRIMARY KEY  (`id`),
-  KEY `title` (`title`),
-  KEY `isbn` (`isbn`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-
 --
 -- Table structure for table `marc_subfield_structure`
 --
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl
@ -43,9 +43,9 @@
 		
 	</li>
 	<li>
-		<label for="filename">Name of this import: </label>
+		<label for="comments">Notes about this import: </label>
 		
-			<input type="text" id="filename" name="filename" />
+			<input type="text" id="comments" name="comments" />
 		
 	</li>
 	<li>
--- a/tools/import.pl
+++ b/tools/import.pl
@ -37,6 +37,8 @@ use C4::Input;
 use C4::Output;
 use C4::Biblio;
 use C4::Breeding;
+use C4::ImportBatch;
+use C4::Matcher;

 #------------------
 # Constants
@ -59,7 +61,7 @@ my $dbh = C4::Context->dbh;

 my $uploadmarc=$input->param('uploadmarc');
 my $overwrite_biblio = $input->param('overwrite_biblio');
-my $filename = $input->param('filename');
+my $comments = $input->param('comments');
 my $syntax = $input->param('syntax');
 my ($template, $loggedinuser, $cookie)
 	= get_template_and_user({template_name => "tools/import.tmpl",
@ -72,18 +74,26 @@ my ($template, $loggedinuser, $cookie)

 $template->param(SCRIPT_NAME => $ENV{'SCRIPT_NAME'},
 						uploadmarc => $uploadmarc);
+my $filename = $uploadmarc;
 if ($uploadmarc && length($uploadmarc)>0) {
 	my $marcrecord='';
 	while (<$uploadmarc>) {
 		$marcrecord.=$_;
 	}
-	my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($marcrecord,$overwrite_biblio,$filename,$syntax,int(rand(99999)), 'batch');
-
-	$template->param(imported => $imported,
-							alreadyindb => $alreadyindb,
-							alreadyinfarm => $alreadyinfarm,
-							notmarcrecord => $notmarcrecord,
-							total => $imported+$alreadyindb+$alreadyinfarm+$notmarcrecord,
+	#my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($marcrecord,$overwrite_biblio,$filename,$syntax,int(rand(99999)), 'batch');
+
+    # FIXME branch code
+    my ($batch_id, $num_valid, @import_errors) = BatchStageMarcRecords($syntax, $marcrecord, $filename, $comments, '', 1);
+    my $matcher = C4::Matcher->new('biblio');
+    $matcher->add_matchpoint("020", "a", '', 'isbn', 1000);
+    my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher);
+    my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id);
+
+	$template->param(imported => $num_valid,
+							alreadyindb => $num_with_matches,
+							alreadyinfarm => 0,
+							notmarcrecord => scalar(@import_errors),
+							total => $num_valid + scalar(@import_errors)
 							);

 }
--- a/updater/updatedatabase
+++ b/updater/updatedatabase
@ -423,6 +423,34 @@ if (C4::Context->preference("Version") < TransformToNum($DBversion)) {
              KEY `itemnumber` (`itemnumber`),
              KEY `branchcode` (`branchcode`)
              ) ENGINE=InnoDB DEFAULT CHARSET=utf8");
+
+    $dbh->do("INSERT INTO `import_batches`
+                (`overlay_action`, `import_status`, `batch_type`, `file_name`)
+              SELECT distinct 'create_new', 'staged', 'z3950', `file`
+              FROM   `marc_breeding`");
+
+    $dbh->do("INSERT INTO `import_records`
+                (`import_batch_id`, `record_sequence`, `marc`, `record_type`, `status`,
+                `encoding`, `z3950random`, `marcxml`, `marcxml_old`)
+              SELECT `import_batch_id`, 1, `marc`, 'biblio', 'staged', `encoding`, `z3950random`, '', ''
+              FROM `marc_breeding`
+              JOIN `import_batches` ON (`file_name` = `file`)");
+
+    $dbh->do("INSERT INTO `import_biblios`
+                (`import_record_id`, `title`, `author`, `isbn`)
+              SELECT `import_record_id`, `title`, `author`, `isbn`
+              FROM   `marc_breeding`
+              JOIN   `import_records` USING (`z3950random`)");
+
+    $dbh->do("UPDATE `import_batches` 
+              SET `num_biblios` = (
+              SELECT COUNT(*)
+              FROM `import_records`
+              WHERE `import_batch_id` = `import_batches`.`import_batch_id`
+              )");
+
+    $dbh->do("DROP TABLE `marc_breeding`");
+
    SetVersion ($DBversion);
 }