From 2e0798336711ddbf7ca5e35766686d34ea01db68 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Mon, 29 Oct 2007 16:45:13 -0500 Subject: [PATCH] more work on batch import * Completely removed old marc_breeding table * Started updated Tools import function to stage records Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Breeding.pm | 8 +- C4/ImportBatch.pm | 339 +++++++++++++++++- C4/Z3950.pm | 4 +- installer/kohastructure.sql | 20 -- .../prog/en/modules/tools/import.tmpl | 4 +- tools/import.pl | 26 +- updater/updatedatabase | 28 ++ 7 files changed, 393 insertions(+), 36 deletions(-) diff --git a/C4/Breeding.pm b/C4/Breeding.pm index d21ae31dc1..0ec0829f06 100644 --- a/C4/Breeding.pm +++ b/C4/Breeding.pm @@ -31,7 +31,8 @@ $VERSION = 0.01; =head1 NAME -C4::Breeding : script to add a biblio in marc_breeding table. +C4::Breeding : module to add biblios to import_records via + the breeding/reservoir API. =head1 SYNOPSIS @@ -50,7 +51,7 @@ C4::Breeding : script to add a biblio in marc_breeding table. =head1 DESCRIPTION - ImportBreeding import MARC records in the reservoir (marc_breeding table). + ImportBreeding import MARC records in the reservoir (import_records/import_batches tables). the records can be properly encoded or not, we try to reencode them in utf-8 if needed. works perfectly with BNF server, that sends UNIMARC latin1 records. Should work with other servers too. the FixEncoding sub is in Koha.pm, as it's a general usage sub. @@ -161,7 +162,8 @@ C<$isbn> contains isbn or issn, C<$random> contains the random seed from a z3950 search. C<$count> is the number of items in C<@results>. C<@results> is an -array of references-to-hash; the keys are the items from the C table of the Koha database. +array of references-to-hash; the keys are the items from the C and +C tables of the Koha database. =cut diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm index 056e06bf21..b2b77b0a8c 100644 --- a/C4/ImportBatch.pm +++ b/C4/ImportBatch.pm @@ -21,6 +21,7 @@ use strict; use C4::Context; use C4::Koha; use C4::Biblio; +use C4::Matcher; require Exporter; @@ -52,6 +53,18 @@ use C4::ImportBatch; AddImportBatch AddBiblioToBatch ModBiblioInBatch + + BatchStageMarcRecords + BatchFindBibDuplicates + BatchCommitBibRecords + + GetImportBatchStatus + SetImportBatchStatus + GetImportBatchOverlayAction + SetImportBatchOverlayAction + GetImportRecordOverlayStatus + SetImportRecordOverlayStatus + SetImportRecordMatches ); =head2 GetZ3950BatchId @@ -140,12 +153,14 @@ sub AddImportBatch { my $import_record_id = AddBiblioToBatch($batch_id, $record_sequence, $marc_record, $encoding, $z3950random); +=back + =cut sub AddBiblioToBatch { my ($batch_id, $record_sequence, $marc_record, $encoding, $z3950random) = @_; - my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'bib', $encoding, $z3950random); + my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'biblio', $encoding, $z3950random); _add_biblio_fields($import_record_id, $marc_record); return $import_record_id; } @@ -156,6 +171,8 @@ sub AddBiblioToBatch { ModBiblioInBatch($import_record_id, $marc_record); +=back + =cut sub ModBiblioInBatch { @@ -166,6 +183,326 @@ sub ModBiblioInBatch { } +=head2 BatchStageMarcRecords + +=over 4 + +($batch_id, $num_records, @invalid_records) = BatchStageMarcRecords($marc_flavor, $marc_records, $file_name, + $comments, $branch_code, $leave_as_staging); + +=back + +=cut + +sub BatchStageMarcRecords { + my ($marc_flavor, $marc_records, $file_name, $comments, $branch_code, $leave_as_staging) = @_; + + my $batch_id = AddImportBatch('create_new', 'staging', 'batch', $file_name, $comments); + my @invalid_records = (); + my $num_valid = 0; + # FIXME - for now, we're dealing only with bibs + my $rec_num = 0; + foreach my $marc_blob (split(/\x1D/, $marc_records)) { + $rec_num++; + my $marc_record = FixEncoding($marc_blob, "\x1D"); + my $import_record_id; + if (scalar($marc_record->fields()) == 0) { + push @invalid_records, $marc_blob; + } else { + $num_valid++; + $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $marc_flavor, int(rand(99999))); + } + } + unless ($leave_as_staging) { + SetImportBatchStatus($batch_id, 'staged'); + } + # FIXME batch_code, number of bibs, number of items + return ($batch_id, $num_valid, @invalid_records); +} + +=head2 BatchFindBibDuplicates + +=over4 + +my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, $max_matches); + +=back + +Goes through the records loaded in the batch and attempts to +find duplicates for each one. Sets the overlay action to +'replace' if it was 'create_new', and sets the overlay status +of each record to 'no_match' or 'auto_match' as appropriate. + +The $max_matches parameter is optional; if it is not supplied, +it defaults to 10. + +=cut + +sub BatchFindBibDuplicates { + my $batch_id = shift; + my $matcher = shift; + my $max_matches = @_ ? shift : 10; + + my $dbh = C4::Context->dbh; + my $old_overlay_action = GetImportBatchOverlayAction($batch_id); + if ($old_overlay_action eq "create_new") { + SetImportBatchOverlayAction($batch_id, 'replace'); + } + + my $sth = $dbh->prepare("SELECT import_record_id, marc + FROM import_records + JOIN import_biblios USING (import_record_id) + WHERE import_batch_id = ?"); + $sth->execute($batch_id); + my $num_with_matches = 0; + while (my $rowref = $sth->fetchrow_hashref) { + my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'}); + my @matches = $matcher->get_matches($marc_record, $max_matches); + if (scalar(@matches) > 0) { + $num_with_matches++; + SetImportRecordMatches($rowref->{'import_record_id'}, @matches); + SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'auto_match'); + } else { + SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'no_match'); + } + } + $sth->finish(); + return $num_with_matches; +} + +=head2 BatchCommitBibRecords + +=over 4 + +my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id); + +=back + +=cut + +sub BatchCommitBibRecords { + my $batch_id = shift; + + my $num_added = 0; + my $num_updated = 0; + my $num_ignored = 0; + # commit (i.e., save, all records in the batch) + # FIXME biblio only at the moment + SetImportBatchStatus('importing'); + my $overlay_action = GetImportBatchOverlayAction($batch_id); + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marc + FROM import_records + JOIN import_biblios USING (import_record_id) + WHERE import_batch_id = ?"); + $sth->execute($batch_id); + while (my $rowref = $sth->fetchrow_hashref) { + if ($rowref->{'status'} eq 'error' or $rowref->{'status'} eq 'imported') { + $num_ignored++; + } + my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'}); + if ($overlay_action eq 'create_new' or + ($overlay_action eq 'replace' and $rowref->{'overlay_status'} eq 'no_match')) { + $num_added++; + my ($biblionumber, $biblioitemnumber) = AddBiblio($marc_record, ''); + } else { + $num_updated++; + my $biblionumber = GetBestRecordMatch($rowref->{'import_record_id'}); + my ($count, $oldbiblio) = GetBiblio($biblionumber); + my $oldxml = GetXmlBiblio($biblionumber); + ModBiblio($marc_record, $biblionumber, $oldbiblio->{'frameworkcode'}); + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("UPDATE import_records SET marcxml_old = ? WHERE import_record_id = ?"); + $sth->execute($oldxml, $rowref->{'import_record_id'}); + $sth->finish(); + SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'match_applied'); + } + } + $sth->finish(); + SetImportBatchStatus('imported'); + return ($num_added, $num_updated, $num_ignored); +} + +=head2 GetBestRecordMatch + +=over 4 + +my $record_id = GetBestRecordMatch($import_record_id); + +=back + +=cut + +sub GetBestRecordMatch { + my ($import_record_id) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("SELECT candidate_match_id + FROM import_record_matches + WHERE import_record_id = ? + ORDER BY score DESC, candidate_match_id DESC"); + $sth->execute($import_record_id); + my ($record_id) = $sth->fetchrow_array(); + $sth->finish(); + return $record_id; +} + +=head2 GetImportBatchStatus + +=over 4 + +my $status = GetImportBatchStatus($batch_id); + +=back + +=cut + +sub GetImportBatchStatus { + my ($batch_id) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("SELECT import_status FROM import_batches WHERE batch_id = ?"); + $sth->execute($batch_id); + my ($status) = $sth->fetchrow_array(); + $sth->finish(); + return; + +} + + +=head2 SetImportBatchStatus + +=over 4 + +SetImportBatchStatus($batch_id, $new_status); + +=back + +=cut + +sub SetImportBatchStatus { + my ($batch_id, $new_status) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("UPDATE import_batches SET import_status = ? WHERE import_batch_id = ?"); + $sth->execute($new_status, $batch_id); + $sth->finish(); + +} + +=head2 GetImportBatchOverlayAction + +=over 4 + +my $overlay_action = GetImportBatchOverlayAction($batch_id); + +=back + +=cut + +sub GetImportBatchOverlayAction { + my ($batch_id) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("SELECT overlay_action FROM import_batches WHERE import_batch_id = ?"); + $sth->execute($batch_id); + my ($overlay_action) = $sth->fetchrow_array(); + $sth->finish(); + return $overlay_action; + +} + + +=head2 SetImportBatchOverlayAction + +=over 4 + +SetImportBatchOverlayAction($batch_id, $new_overlay_action); + +=back + +=cut + +sub SetImportBatchOverlayAction { + my ($batch_id, $new_overlay_action) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("UPDATE import_batches SET overlay_action = ? WHERE import_batch_id = ?"); + $sth->execute($new_overlay_action, $batch_id); + $sth->finish(); + +} + +=head2 GetImportRecordOverlayStatus + +=over 4 + +my $overlay_status = GetImportRecordOverlayStatus($import_record_id); + +=back + +=cut + +sub GetImportRecordOverlayStatus { + my ($import_record_id) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("SELECT overlay_status FROM import_records WHERE import_record_id = ?"); + $sth->execute($import_record_id); + my ($overlay_status) = $sth->fetchrow_array(); + $sth->finish(); + return $overlay_status; + +} + + +=head2 SetImportRecordOverlayStatus + +=over 4 + +SetImportRecordOverlayStatus($import_record_id, $new_overlay_status); + +=back + +=cut + +sub SetImportRecordOverlayStatus { + my ($import_record_id, $new_overlay_status) = @_; + + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("UPDATE import_records SET overlay_status = ? WHERE import_record_id = ?"); + $sth->execute($new_overlay_status, $import_record_id); + $sth->finish(); + +} + +=head2 SetImportRecordMatches + +=over 4 + +SetImportRecordMatches($import_record_id, @matches); + +=back + +=cut + +sub SetImportRecordMatches { + my $import_record_id = shift; + my @matches = @_; + + my $dbh = C4::Context->dbh; + my $delsth = $dbh->prepare("DELETE FROM import_record_matches WHERE import_record_id = ?"); + $delsth->execute($import_record_id); + $delsth->finish(); + + my $sth = $dbh->prepare("INSERT INTO import_record_matches (import_record_id, candidate_match_id, score) + VALUES (?, ?, ?)"); + foreach my $match (@matches) { + $sth->execute($import_record_id, $match->{'record_id'}, $match->{'score'}); + } +} + + # internal functions sub _create_import_record { diff --git a/C4/Z3950.pm b/C4/Z3950.pm index 32f4421fba..ecc3392023 100644 --- a/C4/Z3950.pm +++ b/C4/Z3950.pm @@ -319,13 +319,13 @@ Koha Developement team # * a "search z3950" button is added in the addbiblio template. # * when clicked, a popup appears and z3950/search.pl is called # * z3950/search.pl calls addz3950search in the DB -# * the z3950 daemon retrieve the records and stores them in z3950results AND in marc_breeding table. +# * the z3950 daemon retrieve the records and stores them in import_batches/import_records/import_biblios tables. # * as long as there as searches pending, the popup auto refresh every 2 seconds, and says how many searches are pending. # * when the user clicks on a z3950 result => the parent popup is called with the requested biblio, and auto-filled # # Note : # * character encoding support : (It's a nightmare...) In the z3950servers table, a "encoding" column has been added. You can put "UNIMARC" or "USMARC" in this column. Depending on this, the char_decode in C4::Biblio.pm replaces marc-char-encode by an iso 8859-1 encoding. Note that in the breeding import this value has been added too, for a better support. -# * the marc_breeding and z3950* tables have been modified : they have an encoding column and the random z3950 number is stored too for convenience => it's the key I use to list only requested biblios in the popup. +# * the mport_records and z3950* tables have been modified : they have an encoding column and the random z3950 number is stored too for convenience => it's the key I use to list only requested biblios in the popup. # # Revision 1.8 2003/04/29 08:09:45 tipaul # z3950 support is coming... diff --git a/installer/kohastructure.sql b/installer/kohastructure.sql index 7390e0164b..58ed94ffbc 100644 --- a/installer/kohastructure.sql +++ b/installer/kohastructure.sql @@ -1135,26 +1135,6 @@ CREATE TABLE `letter` ( PRIMARY KEY (`module`,`code`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; - --- --- Table structure for table `marc_breeding` --- - -DROP TABLE IF EXISTS `marc_breeding`; -CREATE TABLE `marc_breeding` ( - `id` bigint(20) NOT NULL auto_increment, - `file` varchar(80) NOT NULL default '', - `isbn` varchar(10) NOT NULL default '', - `title` varchar(128) default NULL, - `author` varchar(80) default NULL, - `marc` longblob, - `encoding` varchar(40) NOT NULL default '', - `z3950random` varchar(40) default NULL, - PRIMARY KEY (`id`), - KEY `title` (`title`), - KEY `isbn` (`isbn`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; - -- -- Table structure for table `marc_subfield_structure` -- diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl index f2363ad9b3..ada3c46e3b 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/tools/import.tmpl @@ -43,9 +43,9 @@
  • - + - +
  • diff --git a/tools/import.pl b/tools/import.pl index 07a4b100cc..9932f1bf16 100755 --- a/tools/import.pl +++ b/tools/import.pl @@ -37,6 +37,8 @@ use C4::Input; use C4::Output; use C4::Biblio; use C4::Breeding; +use C4::ImportBatch; +use C4::Matcher; #------------------ # Constants @@ -59,7 +61,7 @@ my $dbh = C4::Context->dbh; my $uploadmarc=$input->param('uploadmarc'); my $overwrite_biblio = $input->param('overwrite_biblio'); -my $filename = $input->param('filename'); +my $comments = $input->param('comments'); my $syntax = $input->param('syntax'); my ($template, $loggedinuser, $cookie) = get_template_and_user({template_name => "tools/import.tmpl", @@ -72,18 +74,26 @@ my ($template, $loggedinuser, $cookie) $template->param(SCRIPT_NAME => $ENV{'SCRIPT_NAME'}, uploadmarc => $uploadmarc); +my $filename = $uploadmarc; if ($uploadmarc && length($uploadmarc)>0) { my $marcrecord=''; while (<$uploadmarc>) { $marcrecord.=$_; } - my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($marcrecord,$overwrite_biblio,$filename,$syntax,int(rand(99999)), 'batch'); - - $template->param(imported => $imported, - alreadyindb => $alreadyindb, - alreadyinfarm => $alreadyinfarm, - notmarcrecord => $notmarcrecord, - total => $imported+$alreadyindb+$alreadyinfarm+$notmarcrecord, + #my ($notmarcrecord,$alreadyindb,$alreadyinfarm,$imported) = ImportBreeding($marcrecord,$overwrite_biblio,$filename,$syntax,int(rand(99999)), 'batch'); + + # FIXME branch code + my ($batch_id, $num_valid, @import_errors) = BatchStageMarcRecords($syntax, $marcrecord, $filename, $comments, '', 1); + my $matcher = C4::Matcher->new('biblio'); + $matcher->add_matchpoint("020", "a", '', 'isbn', 1000); + my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher); + my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id); + + $template->param(imported => $num_valid, + alreadyindb => $num_with_matches, + alreadyinfarm => 0, + notmarcrecord => scalar(@import_errors), + total => $num_valid + scalar(@import_errors) ); } diff --git a/updater/updatedatabase b/updater/updatedatabase index f8f7c59fd4..4f292ced7c 100755 --- a/updater/updatedatabase +++ b/updater/updatedatabase @@ -423,6 +423,34 @@ if (C4::Context->preference("Version") < TransformToNum($DBversion)) { KEY `itemnumber` (`itemnumber`), KEY `branchcode` (`branchcode`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8"); + + $dbh->do("INSERT INTO `import_batches` + (`overlay_action`, `import_status`, `batch_type`, `file_name`) + SELECT distinct 'create_new', 'staged', 'z3950', `file` + FROM `marc_breeding`"); + + $dbh->do("INSERT INTO `import_records` + (`import_batch_id`, `record_sequence`, `marc`, `record_type`, `status`, + `encoding`, `z3950random`, `marcxml`, `marcxml_old`) + SELECT `import_batch_id`, 1, `marc`, 'biblio', 'staged', `encoding`, `z3950random`, '', '' + FROM `marc_breeding` + JOIN `import_batches` ON (`file_name` = `file`)"); + + $dbh->do("INSERT INTO `import_biblios` + (`import_record_id`, `title`, `author`, `isbn`) + SELECT `import_record_id`, `title`, `author`, `isbn` + FROM `marc_breeding` + JOIN `import_records` USING (`z3950random`)"); + + $dbh->do("UPDATE `import_batches` + SET `num_biblios` = ( + SELECT COUNT(*) + FROM `import_records` + WHERE `import_batch_id` = `import_batches`.`import_batch_id` + )"); + + $dbh->do("DROP TABLE `marc_breeding`"); + SetVersion ($DBversion); } -- 2.20.1