1 package C4::ImportBatch;
3 # Copyright (C) 2007 LibLime
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along with
17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
18 # Suite 330, Boston, MA 02111-1307 USA
28 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
30 # set the version for version checking
35 C4::ImportBatch - manage batches of imported MARC records
59 BatchFindBibDuplicates
63 GetImportBatchRangeDesc
64 GetNumberOfNonZ3950ImportBatches
69 GetImportBatchOverlayAction
70 SetImportBatchOverlayAction
71 GetImportRecordOverlayStatus
72 SetImportRecordOverlayStatus
75 GetImportRecordMatches
76 SetImportRecordMatches
79 =head2 GetZ3950BatchId
83 my $batchid = GetZ3950BatchId($z3950server);
87 Retrieves the ID of the import batch for the Z39.50
88 reservoir for the given target. If necessary,
89 creates the import batch.
94 my ($z3950server) = @_;
96 my $dbh = C4::Context->dbh;
97 my $sth = $dbh->prepare("SELECT import_batch_id FROM import_batches
98 WHERE batch_type = 'z3950'
100 $sth->execute($z3950server);
101 my $rowref = $sth->fetchrow_arrayref();
103 if (defined $rowref) {
106 my $batch_id = AddImportBatch('create_new', 'staged', 'z3950', $z3950server, '');
112 =head2 GetImportRecordMarc
116 my ($marcblob, $encoding) = GetImportRecordMarc($import_record_id);
122 sub GetImportRecordMarc {
123 my ($import_record_id) = @_;
125 my $dbh = C4::Context->dbh;
126 my $sth = $dbh->prepare("SELECT marc, encoding FROM import_records WHERE import_record_id = ?");
127 $sth->execute($import_record_id);
128 my ($marc, $encoding) = $sth->fetchrow();
134 =head2 AddImportBatch
138 my $batch_id = AddImportBatch($overlay_action, $import_status, $type, $file_name, $comments);
145 my ($overlay_action, $import_status, $type, $file_name, $comments) = @_;
147 my $dbh = C4::Context->dbh;
148 my $sth = $dbh->prepare("INSERT INTO import_batches (overlay_action, import_status, batch_type,
150 VALUES (?, ?, ?, ?, ?)");
151 $sth->execute($overlay_action, $import_status, $type, $file_name, $comments);
152 my $batch_id = $dbh->{'mysql_insertid'};
159 =head2 GetImportBatch
163 my $row = GetImportBatch($batch_id);
167 Retrieve a hashref of an import_batches row.
174 my $dbh = C4::Context->dbh;
175 my $sth = $dbh->prepare_cached("SELECT * FROM import_batches WHERE import_batch_id = ?");
176 $sth->bind_param(1, $batch_id);
178 my $result = $sth->fetchrow_hashref;
184 =head2 AddBiblioToBatch
188 my $import_record_id = AddBiblioToBatch($batch_id, $record_sequence, $marc_record, $encoding, $z3950random, $update_counts);
194 sub AddBiblioToBatch {
195 my $batch_id = shift;
196 my $record_sequence = shift;
197 my $marc_record = shift;
198 my $encoding = shift;
199 my $z3950random = shift;
200 my $update_counts = @_ ? shift : 1;
202 my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'biblio', $encoding, $z3950random);
203 _add_biblio_fields($import_record_id, $marc_record);
204 _update_batch_record_counts($batch_id) if $update_counts;
205 return $import_record_id;
208 =head2 ModBiblioInBatch
212 ModBiblioInBatch($import_record_id, $marc_record);
218 sub ModBiblioInBatch {
219 my ($import_record_id, $marc_record) = @_;
221 _update_import_record_marc($import_record_id, $marc_record);
222 _update_biblio_fields($import_record_id, $marc_record);
226 =head2 BatchStageMarcRecords
230 ($batch_id, $num_records, @invalid_records) = BatchStageMarcRecords($marc_flavor, $marc_records, $file_name,
231 $comments, $branch_code, $leave_as_staging);
237 sub BatchStageMarcRecords {
238 my ($marc_flavor, $marc_records, $file_name, $comments, $branch_code, $leave_as_staging) = @_;
240 my $batch_id = AddImportBatch('create_new', 'staging', 'batch', $file_name, $comments);
241 my @invalid_records = ();
243 # FIXME - for now, we're dealing only with bibs
245 foreach my $marc_blob (split(/\x1D/, $marc_records)) {
247 my $marc_record = FixEncoding($marc_blob, "\x1D");
248 my $import_record_id;
249 if (scalar($marc_record->fields()) == 0) {
250 push @invalid_records, $marc_blob;
253 $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $marc_flavor, int(rand(99999)), 0);
256 unless ($leave_as_staging) {
257 SetImportBatchStatus($batch_id, 'staged');
259 # FIXME branch_code, number of bibs, number of items
260 _update_batch_record_counts($batch_id);
261 return ($batch_id, $num_valid, @invalid_records);
264 =head2 BatchFindBibDuplicates
268 my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, $max_matches);
272 Goes through the records loaded in the batch and attempts to
273 find duplicates for each one. Sets the overlay action to
274 'replace' if it was 'create_new', and sets the overlay status
275 of each record to 'no_match' or 'auto_match' as appropriate.
277 The $max_matches parameter is optional; if it is not supplied,
282 sub BatchFindBibDuplicates {
283 my $batch_id = shift;
285 my $max_matches = @_ ? shift : 10;
287 my $dbh = C4::Context->dbh;
288 my $old_overlay_action = GetImportBatchOverlayAction($batch_id);
289 if ($old_overlay_action eq "create_new") {
290 SetImportBatchOverlayAction($batch_id, 'replace');
293 my $sth = $dbh->prepare("SELECT import_record_id, marc
295 JOIN import_biblios USING (import_record_id)
296 WHERE import_batch_id = ?");
297 $sth->execute($batch_id);
298 my $num_with_matches = 0;
299 while (my $rowref = $sth->fetchrow_hashref) {
300 my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
301 my @matches = $matcher->get_matches($marc_record, $max_matches);
302 if (scalar(@matches) > 0) {
304 SetImportRecordMatches($rowref->{'import_record_id'}, @matches);
305 SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'auto_match');
307 SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'no_match');
311 return $num_with_matches;
314 =head2 BatchCommitBibRecords
318 my ($num_added, $num_updated, $num_ignored) = BatchCommitBibRecords($batch_id);
324 sub BatchCommitBibRecords {
325 my $batch_id = shift;
330 # commit (i.e., save, all records in the batch)
331 # FIXME biblio only at the moment
332 SetImportBatchStatus('importing');
333 my $overlay_action = GetImportBatchOverlayAction($batch_id);
334 my $dbh = C4::Context->dbh;
335 my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marc
337 JOIN import_biblios USING (import_record_id)
338 WHERE import_batch_id = ?");
339 $sth->execute($batch_id);
340 while (my $rowref = $sth->fetchrow_hashref) {
341 if ($rowref->{'status'} eq 'error' or $rowref->{'status'} eq 'imported') {
344 my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'});
345 if ($overlay_action eq 'create_new' or
346 ($overlay_action eq 'replace' and $rowref->{'overlay_status'} eq 'no_match')) {
348 my ($biblionumber, $biblioitemnumber) = AddBiblio($marc_record, '');
349 my $sth = $dbh->prepare_cached("UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?");
350 $sth->execute($biblionumber, $rowref->{'import_record_id'});
352 SetImportRecordStatus($rowref->{'import_record_id'}, 'imported');
355 my $biblionumber = GetBestRecordMatch($rowref->{'import_record_id'});
356 my ($count, $oldbiblio) = GetBiblio($biblionumber);
357 my $oldxml = GetXmlBiblio($biblionumber);
358 ModBiblio($marc_record, $biblionumber, $oldbiblio->{'frameworkcode'});
359 my $sth = $dbh->prepare_cached("UPDATE import_records SET marcxml_old = ? WHERE import_record_id = ?");
360 $sth->execute($oldxml, $rowref->{'import_record_id'});
362 my $sth2 = $dbh->prepare_cached("UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?");
363 $sth2->execute($biblionumber, $rowref->{'import_record_id'});
365 SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'match_applied');
366 SetImportRecordStatus($rowref->{'import_record_id'}, 'imported');
370 SetImportBatchStatus($batch_id, 'imported');
371 return ($num_added, $num_updated, $num_ignored);
374 =head2 BatchRevertBibRecords
378 my ($num_deleted, $num_errors, $num_reverted, $num_ignored) = BatchRevertBibRecords($batch_id);
384 sub BatchRevertBibRecords {
385 my $batch_id = shift;
389 my $num_reverted = 0;
391 # commit (i.e., save, all records in the batch)
392 # FIXME biblio only at the moment
393 SetImportBatchStatus('reverting');
394 my $overlay_action = GetImportBatchOverlayAction($batch_id);
395 my $dbh = C4::Context->dbh;
396 my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marcxml_old, encoding, matched_biblionumber
398 JOIN import_biblios USING (import_record_id)
399 WHERE import_batch_id = ?");
400 $sth->execute($batch_id);
401 while (my $rowref = $sth->fetchrow_hashref) {
402 if ($rowref->{'status'} eq 'error' or $rowref->{'status'} eq 'reverted') {
405 if ($overlay_action eq 'create_new' or
406 ($overlay_action eq 'replace' and $rowref->{'overlay_status'} eq 'no_match')) {
407 my $error = DelBiblio($rowref->{'matched_biblionumber'});
408 if (defined $error) {
412 SetImportRecordStatus($rowref->{'import_record_id'}, 'reverted');
416 my $old_record = MARC::Record->new_from_xml($rowref->{'marcxml_old'}, 'UTF-8', $rowref->{'encoding'});
417 my $biblionumber = $rowref->{'matched_biblionumber'};
418 my ($count, $oldbiblio) = GetBiblio($biblionumber);
419 ModBiblio($old_record, $biblionumber, $oldbiblio->{'frameworkcode'});
420 SetImportRecordStatus($rowref->{'import_record_id'}, 'reverted');
424 SetImportBatchStatus($batch_id, 'reverted');
425 return ($num_deleted, $num_errors, $num_reverted, $num_ignored);
428 =head2 GetImportBatchRangeDesc
432 my $results = GetImportBatchRangeDesc($offset, $results_per_group);
436 Returns a reference to an array of hash references corresponding to
437 import_batches rows (sorted in descending order by import_batch_id)
438 start at the given offset.
442 sub GetImportBatchRangeDesc {
443 my ($offset, $results_per_group) = @_;
445 my $dbh = C4::Context->dbh;
446 my $sth = $dbh->prepare_cached("SELECT * FROM import_batches
447 WHERE batch_type = 'batch'
448 ORDER BY import_batch_id DESC
450 $sth->bind_param(1, $results_per_group);
451 $sth->bind_param(2, $offset);
455 while (my $row = $sth->fetchrow_hashref) {
456 push @$results, $row;
462 =head2 GetNumberOfImportBatches
466 my $count = GetNumberOfImportBatches();
472 sub GetNumberOfNonZ3950ImportBatches {
473 my $dbh = C4::Context->dbh;
474 my $sth = $dbh->prepare("SELECT COUNT(*) FROM import_batches WHERE batch_type='batch'");
476 my ($count) = $sth->fetchrow_array();
481 =head2 GetImportBibliosRange
485 my $results = GetImportBibliosRange($batch_id, $offset, $results_per_group);
489 Returns a reference to an array of hash references corresponding to
490 import_biblios/import_records rows for a given batch
491 starting at the given offset.
495 sub GetImportBibliosRange {
496 my ($batch_id, $offset, $results_per_group) = @_;
498 my $dbh = C4::Context->dbh;
499 my $sth = $dbh->prepare_cached("SELECT title, author, isbn, issn, import_record_id, record_sequence,
500 status, overlay_status
502 JOIN import_biblios USING (import_record_id)
503 WHERE import_batch_id = ?
504 ORDER BY import_record_id LIMIT ? OFFSET ?");
505 $sth->bind_param(1, $batch_id);
506 $sth->bind_param(2, $results_per_group);
507 $sth->bind_param(3, $offset);
510 while (my $row = $sth->fetchrow_hashref) {
511 push @$results, $row;
518 =head2 GetBestRecordMatch
522 my $record_id = GetBestRecordMatch($import_record_id);
528 sub GetBestRecordMatch {
529 my ($import_record_id) = @_;
531 my $dbh = C4::Context->dbh;
532 my $sth = $dbh->prepare("SELECT candidate_match_id
533 FROM import_record_matches
534 WHERE import_record_id = ?
535 ORDER BY score DESC, candidate_match_id DESC");
536 $sth->execute($import_record_id);
537 my ($record_id) = $sth->fetchrow_array();
542 =head2 GetImportBatchStatus
546 my $status = GetImportBatchStatus($batch_id);
552 sub GetImportBatchStatus {
555 my $dbh = C4::Context->dbh;
556 my $sth = $dbh->prepare("SELECT import_status FROM import_batches WHERE batch_id = ?");
557 $sth->execute($batch_id);
558 my ($status) = $sth->fetchrow_array();
565 =head2 SetImportBatchStatus
569 SetImportBatchStatus($batch_id, $new_status);
575 sub SetImportBatchStatus {
576 my ($batch_id, $new_status) = @_;
578 my $dbh = C4::Context->dbh;
579 my $sth = $dbh->prepare("UPDATE import_batches SET import_status = ? WHERE import_batch_id = ?");
580 $sth->execute($new_status, $batch_id);
585 =head2 GetImportBatchOverlayAction
589 my $overlay_action = GetImportBatchOverlayAction($batch_id);
595 sub GetImportBatchOverlayAction {
598 my $dbh = C4::Context->dbh;
599 my $sth = $dbh->prepare("SELECT overlay_action FROM import_batches WHERE import_batch_id = ?");
600 $sth->execute($batch_id);
601 my ($overlay_action) = $sth->fetchrow_array();
603 return $overlay_action;
608 =head2 SetImportBatchOverlayAction
612 SetImportBatchOverlayAction($batch_id, $new_overlay_action);
618 sub SetImportBatchOverlayAction {
619 my ($batch_id, $new_overlay_action) = @_;
621 my $dbh = C4::Context->dbh;
622 my $sth = $dbh->prepare("UPDATE import_batches SET overlay_action = ? WHERE import_batch_id = ?");
623 $sth->execute($new_overlay_action, $batch_id);
628 =head2 GetImportRecordOverlayStatus
632 my $overlay_status = GetImportRecordOverlayStatus($import_record_id);
638 sub GetImportRecordOverlayStatus {
639 my ($import_record_id) = @_;
641 my $dbh = C4::Context->dbh;
642 my $sth = $dbh->prepare("SELECT overlay_status FROM import_records WHERE import_record_id = ?");
643 $sth->execute($import_record_id);
644 my ($overlay_status) = $sth->fetchrow_array();
646 return $overlay_status;
651 =head2 SetImportRecordOverlayStatus
655 SetImportRecordOverlayStatus($import_record_id, $new_overlay_status);
661 sub SetImportRecordOverlayStatus {
662 my ($import_record_id, $new_overlay_status) = @_;
664 my $dbh = C4::Context->dbh;
665 my $sth = $dbh->prepare("UPDATE import_records SET overlay_status = ? WHERE import_record_id = ?");
666 $sth->execute($new_overlay_status, $import_record_id);
671 =head2 GetImportRecordStatus
675 my $overlay_status = GetImportRecordStatus($import_record_id);
681 sub GetImportRecordStatus {
682 my ($import_record_id) = @_;
684 my $dbh = C4::Context->dbh;
685 my $sth = $dbh->prepare("SELECT status FROM import_records WHERE import_record_id = ?");
686 $sth->execute($import_record_id);
687 my ($overlay_status) = $sth->fetchrow_array();
689 return $overlay_status;
694 =head2 SetImportRecordStatus
698 SetImportRecordStatus($import_record_id, $new_overlay_status);
704 sub SetImportRecordStatus {
705 my ($import_record_id, $new_overlay_status) = @_;
707 my $dbh = C4::Context->dbh;
708 my $sth = $dbh->prepare("UPDATE import_records SET status = ? WHERE import_record_id = ?");
709 $sth->execute($new_overlay_status, $import_record_id);
714 =head2 GetImportRecordMatches
718 my $results = GetImportRecordMatches($import_record_id, $best_only);
724 sub GetImportRecordMatches {
725 my $import_record_id = shift;
726 my $best_only = @_ ? shift : 0;
728 my $dbh = C4::Context->dbh;
729 # FIXME currently biblio only
730 my $sth = $dbh->prepare_cached("SELECT title, author, biblionumber, score
732 JOIN import_record_matches USING (import_record_id)
733 JOIN biblio ON (biblionumber = candidate_match_id)
734 WHERE import_record_id = ?
735 ORDER BY score DESC, biblionumber DESC");
736 $sth->bind_param(1, $import_record_id);
739 while (my $row = $sth->fetchrow_hashref) {
740 push @$results, $row;
750 =head2 SetImportRecordMatches
754 SetImportRecordMatches($import_record_id, @matches);
760 sub SetImportRecordMatches {
761 my $import_record_id = shift;
764 my $dbh = C4::Context->dbh;
765 my $delsth = $dbh->prepare("DELETE FROM import_record_matches WHERE import_record_id = ?");
766 $delsth->execute($import_record_id);
769 my $sth = $dbh->prepare("INSERT INTO import_record_matches (import_record_id, candidate_match_id, score)
771 foreach my $match (@matches) {
772 $sth->execute($import_record_id, $match->{'record_id'}, $match->{'score'});
779 sub _create_import_record {
780 my ($batch_id, $record_sequence, $marc_record, $record_type, $encoding, $z3950random) = @_;
782 my $dbh = C4::Context->dbh;
783 my $sth = $dbh->prepare("INSERT INTO import_records (import_batch_id, record_sequence, marc, marcxml,
784 record_type, encoding, z3950random)
785 VALUES (?, ?, ?, ?, ?, ?, ?)");
786 $sth->execute($batch_id, $record_sequence, $marc_record->as_usmarc(), $marc_record->as_xml(),
787 $record_type, $encoding, $z3950random);
788 my $import_record_id = $dbh->{'mysql_insertid'};
790 return $import_record_id;
793 sub _update_import_record_marc {
794 my ($import_record_id, $marc_record) = @_;
796 my $dbh = C4::Context->dbh;
797 my $sth = $dbh->prepare("UPDATE import_records SET marc = ?, marcxml = ?
798 WHERE import_record_id = ?");
799 $sth->execute($marc_record->as_usmarc(), $marc_record->as_xml(), $import_record_id);
803 sub _add_biblio_fields {
804 my ($import_record_id, $marc_record) = @_;
806 my ($title, $author, $isbn, $issn) = _parse_biblio_fields($marc_record);
807 my $dbh = C4::Context->dbh;
808 # FIXME no controlnumber, originalsource
809 # FIXME 2 - should regularize normalization of ISBN wherever it is done
813 my $sth = $dbh->prepare("INSERT INTO import_biblios (import_record_id, title, author, isbn, issn) VALUES (?, ?, ?, ?, ?)");
814 $sth->execute($import_record_id, $title, $author, $isbn, $issn);
819 sub _update_biblio_fields {
820 my ($import_record_id, $marc_record) = @_;
822 my ($title, $author, $isbn, $issn) = _parse_biblio_fields($marc_record);
823 my $dbh = C4::Context->dbh;
824 # FIXME no controlnumber, originalsource
825 # FIXME 2 - should regularize normalization of ISBN wherever it is done
829 my $sth = $dbh->prepare("UPDATE import_biblios SET title = ?, author = ?, isbn = ?, issn = ?
830 WHERE import_record_id = ?");
831 $sth->execute($title, $author, $isbn, $issn, $import_record_id);
835 sub _parse_biblio_fields {
836 my ($marc_record) = @_;
838 my $dbh = C4::Context->dbh;
839 my $bibliofields = TransformMarcToKoha($dbh, $marc_record, '');
840 return ($bibliofields->{'title'}, $bibliofields->{'author'}, $bibliofields->{'isbn'}, $bibliofields->{'issn'});
844 sub _update_batch_record_counts {
847 my $dbh = C4::Context->dbh;
848 my $sth = $dbh->prepare_cached("UPDATE import_batches SET num_biblios = (
851 WHERE import_batch_id = import_batches.import_batch_id
852 AND record_type = 'biblio')
853 WHERE import_batch_id = ?");
854 $sth->bind_param(1, $batch_id);
864 Koha Development Team <info@koha.org>
866 Galen Charlton <galen.charlton@liblime.com>