From 6e71b80ca39e6f1d2c31659a9a656de807ef3f32 Mon Sep 17 00:00:00 2001 From: Jared Camins-Esakov Date: Mon, 25 Jun 2012 11:34:59 -0400 Subject: [PATCH] Bug 7475: Teach matching rules to handle authorities * Add the code necessary to handle authorities with matching rules and import batches. * Update all the scripts that use the matcher and import batch code to use the new API. * Add authority records to the matching rules interface in the staff client. http://bugs.koha-community.org/show_bug.cgi?id=2060 Signed-off-by: Elliott Davis Signed-off-by: Jared Camins-Esakov Rebased on latest master 11 September 2012 --- C4/AuthoritiesMarc.pm | 64 ++++ C4/ImportBatch.pm | 350 ++++++++++++------ C4/Matcher.pm | 78 +++- acqui/addorderiso2709.pl | 6 +- admin/matching-rules.pl | 12 +- .../prog/en/modules/admin/matching-rules.tt | 11 + misc/cronjobs/import_webservice_batch.pl | 2 +- svc/import_bib | 2 +- tools/manage-marc-import.pl | 87 +++-- tools/stage-marc-import.pl | 4 +- 10 files changed, 436 insertions(+), 180 deletions(-) diff --git a/C4/AuthoritiesMarc.pm b/C4/AuthoritiesMarc.pm index 79f3956f34..9895e195ea 100644 --- a/C4/AuthoritiesMarc.pm +++ b/C4/AuthoritiesMarc.pm @@ -55,6 +55,7 @@ BEGIN { &BuildSummary &BuildUnimarcHierarchies &BuildUnimarcHierarchy + &GetAuthorizedHeading &merge &FindDuplicateAuthority @@ -1165,6 +1166,69 @@ sub BuildSummary { return \%summary; } +=head2 GetAuthorizedHeading + + $heading = &GetAuthorizedHeading({ record => $record, authid => $authid }) + +Takes a MARC::Record object describing an authority record or an authid, and +returns a string representation of the first authorized heading. This routine +should be considered a temporary shim to ease the future migration of authority +data from C4::AuthoritiesMarc to the object-oriented Koha::*::Authority. + +=cut + +sub GetAuthorizedHeading { + my $args = shift; + my $record; + unless ($record = $args->{record}) { + return unless $args->{authid}; + $record = GetAuthority($args->{authid}); + } + if (C4::Context->preference('marcflavour') eq 'UNIMARC') { +# construct UNIMARC summary, that is quite different from MARC21 one +# accepted form + foreach my $field ($record->field('2..')) { + return $field->as_string('abcdefghijlmnopqrstuvwxyz'); + } + } else { + foreach my $field ($record->field('1..')) { + my $tag = $field->tag(); + next if "152" eq $tag; +# FIXME - 152 is not a good tag to use +# in MARC21 -- purely local tags really ought to be +# 9XX + if ($tag eq '100') { + return $field->as_string('abcdefghjklmnopqrstvxyz68'); + } elsif ($tag eq '110') { + return $field->as_string('abcdefghklmnoprstvxyz68'); + } elsif ($tag eq '111') { + return $field->as_string('acdefghklnpqstvxyz68'); + } elsif ($tag eq '130') { + return $field->as_string('adfghklmnoprstvxyz68'); + } elsif ($tag eq '148') { + return $field->as_string('abvxyz68'); + } elsif ($tag eq '150') { + return $field->as_string('abvxyz68'); + } elsif ($tag eq '151') { + return $field->as_string('avxyz68'); + } elsif ($tag eq '155') { + return $field->as_string('abvxyz68'); + } elsif ($tag eq '180') { + return $field->as_string('vxyz68'); + } elsif ($tag eq '181') { + return $field->as_string('vxyz68'); + } elsif ($tag eq '182') { + return $field->as_string('vxyz68'); + } elsif ($tag eq '185') { + return $field->as_string('vxyz68'); + } else { + return $field->as_string(); + } + } + } + return; +} + =head2 BuildUnimarcHierarchies $text= &BuildUnimarcHierarchies( $authid, $force) diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm index 8cfb3e6b58..5149f9bff2 100644 --- a/C4/ImportBatch.pm +++ b/C4/ImportBatch.pm @@ -1,6 +1,6 @@ package C4::ImportBatch; -# Copyright (C) 2007 LibLime +# Copyright (C) 2007 LibLime, 2012 C & P Bibliography Services # # This file is part of Koha. # @@ -25,6 +25,7 @@ use C4::Koha; use C4::Biblio; use C4::Items; use C4::Charset; +use C4::AuthoritiesMarc; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @@ -40,21 +41,23 @@ BEGIN { GetImportRecordMarcXML AddImportBatch GetImportBatch + AddAuthToBatch AddBiblioToBatch AddItemsToImportBiblio + ModAuthorityInBatch ModBiblioInBatch BatchStageMarcRecords - BatchFindBibDuplicates - BatchCommitBibRecords - BatchRevertBibRecords + BatchFindDuplicates + BatchCommitRecords + BatchRevertRecords CleanBatch GetAllImportBatches GetStagedWebserviceBatches GetImportBatchRangeDesc GetNumberOfNonZ3950ImportBatches - GetImportBibliosRange + GetImportRecordsRange GetItemNumbersFromImportBatch GetImportBatchStatus @@ -272,10 +275,44 @@ sub ModBiblioInBatch { } +=head2 AddAuthToBatch + + my $import_record_id = AddAuthToBatch($batch_id, $record_sequence, + $marc_record, $encoding, $z3950random, $update_counts); + +=cut + +sub AddAuthToBatch { + my $batch_id = shift; + my $record_sequence = shift; + my $marc_record = shift; + my $encoding = shift; + my $z3950random = shift; + my $update_counts = @_ ? shift : 1; + + my $import_record_id = _create_import_record($batch_id, $record_sequence, $marc_record, 'auth', $encoding, $z3950random); + _add_auth_fields($import_record_id, $marc_record); + _update_batch_record_counts($batch_id) if $update_counts; + return $import_record_id; +} + +=head2 ModAuthInBatch + + ModAuthInBatch($import_record_id, $marc_record); + +=cut + +sub ModAuthInBatch { + my ($import_record_id, $marc_record) = @_; + + _update_import_record_marc($import_record_id, $marc_record); + +} + =head2 BatchStageMarcRecords ($batch_id, $num_records, $num_items, @invalid_records) = - BatchStageMarcRecords($encoding, $marc_records, $file_name, + BatchStageMarcRecords($record_type, $encoding, $marc_records, $file_name, $comments, $branch_code, $parse_items, $leave_as_staging, $progress_interval, $progress_callback); @@ -283,6 +320,7 @@ sub ModBiblioInBatch { =cut sub BatchStageMarcRecords { + my $record_type = shift; my $encoding = shift; my $marc_records = shift; my $file_name = shift; @@ -338,10 +376,14 @@ sub BatchStageMarcRecords { push @invalid_records, $marc_blob; } else { $num_valid++; - $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0); - if ($parse_items) { - my @import_items_ids = AddItemsToImportBiblio($batch_id, $import_record_id, $marc_record, 0); - $num_items += scalar(@import_items_ids); + if ($record_type eq 'biblio') { + $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0); + if ($parse_items) { + my @import_items_ids = AddItemsToImportBiblio($batch_id, $import_record_id, $marc_record, 0); + $num_items += scalar(@import_items_ids); + } + } elsif ($record_type eq 'auth') { + $import_record_id = AddAuthToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0); } } } @@ -392,9 +434,9 @@ sub AddItemsToImportBiblio { return @import_items_ids; } -=head2 BatchFindBibDuplicates +=head2 BatchFindDuplicates - my $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, + my $num_with_matches = BatchFindDuplicates($batch_id, $matcher, $max_matches, $progress_interval, $progress_callback); Goes through the records loaded in the batch and attempts to @@ -412,7 +454,7 @@ singular argument. =cut -sub BatchFindBibDuplicates { +sub BatchFindDuplicates { my $batch_id = shift; my $matcher = shift; my $max_matches = @_ ? shift : 10; @@ -430,9 +472,8 @@ sub BatchFindBibDuplicates { my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare("SELECT import_record_id, marc + my $sth = $dbh->prepare("SELECT import_record_id, record_type, marc FROM import_records - JOIN import_biblios USING (import_record_id) WHERE import_batch_id = ?"); $sth->execute($batch_id); my $num_with_matches = 0; @@ -460,15 +501,15 @@ sub BatchFindBibDuplicates { return $num_with_matches; } -=head2 BatchCommitBibRecords +=head2 BatchCommitRecords - my ($num_added, $num_updated, $num_items_added, $num_items_errored, - $num_ignored) = BatchCommitBibRecords($batch_id, $framework, - $progress_interval, $progress_callback); + my ($num_added, $num_updated, $num_items_added, $num_items_errored, $num_ignored) = + BatchCommitRecords($batch_id, $framework, + $progress_interval, $progress_callback); =cut -sub BatchCommitBibRecords { +sub BatchCommitRecords { my $batch_id = shift; my $framework = shift; @@ -483,25 +524,29 @@ sub BatchCommitBibRecords { $progress_interval = 0 unless 'CODE' eq ref $progress_callback; } + my $record_type; my $num_added = 0; my $num_updated = 0; my $num_items_added = 0; my $num_items_errored = 0; my $num_ignored = 0; # commit (i.e., save, all records in the batch) - # FIXME biblio only at the moment SetImportBatchStatus('importing'); my $overlay_action = GetImportBatchOverlayAction($batch_id); my $nomatch_action = GetImportBatchNoMatchAction($batch_id); my $item_action = GetImportBatchItemAction($batch_id); + my $item_tag; + my $item_subfield; my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marc, encoding + my $sth = $dbh->prepare("SELECT import_records.import_record_id, record_type, status, overlay_status, marc, encoding FROM import_records - JOIN import_biblios USING (import_record_id) + LEFT JOIN import_auths ON (import_records.import_record_id=import_auths.import_record_id) + LEFT JOIN import_biblios ON (import_records.import_record_id=import_biblios.import_record_id) WHERE import_batch_id = ?"); $sth->execute($batch_id); my $rec_num = 0; while (my $rowref = $sth->fetchrow_hashref) { + $record_type = $rowref->{'record_type'}; $rec_num++; if ($progress_interval and (0 == ($rec_num % $progress_interval))) { &$progress_callback($rec_num); @@ -513,67 +558,87 @@ sub BatchCommitBibRecords { my $marc_record = MARC::Record->new_from_usmarc($rowref->{'marc'}); - # remove any item tags - rely on BatchCommitItems - my ($item_tag,$item_subfield) = &GetMarcFromKohaField("items.itemnumber",''); - foreach my $item_field ($marc_record->field($item_tag)) { - $marc_record->delete_field($item_field); + if ($record_type eq 'biblio') { + # remove any item tags - rely on BatchCommitItems + ($item_tag,$item_subfield) = &GetMarcFromKohaField("items.itemnumber",''); + foreach my $item_field ($marc_record->field($item_tag)) { + $marc_record->delete_field($item_field); + } } - # decide what what to do with the bib and item records - my ($bib_result, $item_result, $bib_match) = + my ($record_result, $item_result, $record_match) = _get_commit_action($overlay_action, $nomatch_action, $item_action, - $rowref->{'overlay_status'}, $rowref->{'import_record_id'}); + $rowref->{'overlay_status'}, $rowref->{'import_record_id'}, $record_type); - if ($bib_result eq 'create_new') { + my $recordid; + my $query; + if ($record_result eq 'create_new') { $num_added++; - my ($biblionumber, $biblioitemnumber) = AddBiblio($marc_record, $framework); - my $sth = $dbh->prepare_cached("UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?"); - $sth->execute($biblionumber, $rowref->{'import_record_id'}); - $sth->finish(); - if ($item_result eq 'create_new') { - my ($bib_items_added, $bib_items_errored) = BatchCommitItems($rowref->{'import_record_id'}, $biblionumber); - $num_items_added += $bib_items_added; - $num_items_errored += $bib_items_errored; + if ($record_type eq 'biblio') { + my $biblioitemnumber; + ($recordid, $biblioitemnumber) = AddBiblio($marc_record, $framework); + $query = "UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?"; + if ($item_result eq 'create_new') { + my ($bib_items_added, $bib_items_errored) = BatchCommitItems($rowref->{'import_record_id'}, $recordid); + $num_items_added += $bib_items_added; + $num_items_errored += $bib_items_errored; + } + } else { + my $authid = AddAuthority($marc_record, undef, GuessAuthTypeCode($marc_record)); + $query = "UPDATE import_auths SET matched_authid = ? WHERE import_record_id = ?"; } + my $sth = $dbh->prepare_cached($query); + $sth->execute($recordid, $rowref->{'import_record_id'}); + $sth->finish(); SetImportRecordStatus($rowref->{'import_record_id'}, 'imported'); - } elsif ($bib_result eq 'replace') { + } elsif ($record_result eq 'replace') { $num_updated++; - my $biblionumber = $bib_match; - my $oldbiblio = GetBiblio($biblionumber); - my $oldxml = GetXmlBiblio($biblionumber); - - # remove item fields so that they don't get - # added again if record is reverted - my $old_marc = MARC::Record->new_from_xml(StripNonXmlChars($oldxml), 'UTF-8', $rowref->{'encoding'}); - foreach my $item_field ($old_marc->field($item_tag)) { - $old_marc->delete_field($item_field); - } + $recordid = $record_match; + my $oldxml; + if ($record_type eq 'biblio') { + my ($count, $oldbiblio) = GetBiblio($recordid); + $oldxml = GetXmlBiblio($recordid); + + # remove item fields so that they don't get + # added again if record is reverted + my $old_marc = MARC::Record->new_from_xml(StripNonXmlChars($oldxml), 'UTF-8', $rowref->{'encoding'}); + foreach my $item_field ($old_marc->field($item_tag)) { + $old_marc->delete_field($item_field); + } + $oldxml = $old_marc->as_xml(); + + ModBiblio($marc_record, $recordid, $oldbiblio->{'frameworkcode'}); + $query = "UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?"; + + if ($item_result eq 'create_new') { + my ($bib_items_added, $bib_items_errored) = BatchCommitItems($rowref->{'import_record_id'}, $recordid); + $num_items_added += $bib_items_added; + $num_items_errored += $bib_items_errored; + } + } else { + my $oldxml = GetAuthorityXML($recordid); - ModBiblio($marc_record, $biblionumber, $oldbiblio->{'frameworkcode'}); + ModAuthority($recordid, $marc_record, GuessAuthTypeCode($marc_record)); + $query = "UPDATE import_auths SET matched_authid = ? WHERE import_record_id = ?"; + } my $sth = $dbh->prepare_cached("UPDATE import_records SET marcxml_old = ? WHERE import_record_id = ?"); - $sth->execute($old_marc->as_xml(), $rowref->{'import_record_id'}); + $sth->execute($oldxml, $rowref->{'import_record_id'}); $sth->finish(); - my $sth2 = $dbh->prepare_cached("UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?"); - $sth2->execute($biblionumber, $rowref->{'import_record_id'}); + my $sth2 = $dbh->prepare_cached($query); + $sth2->execute($recordid, $rowref->{'import_record_id'}); $sth2->finish(); - if ($item_result eq 'create_new') { - my ($bib_items_added, $bib_items_errored) = BatchCommitItems($rowref->{'import_record_id'}, $biblionumber); - $num_items_added += $bib_items_added; - $num_items_errored += $bib_items_errored; - } SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'match_applied'); SetImportRecordStatus($rowref->{'import_record_id'}, 'imported'); - } elsif ($bib_result eq 'ignore') { + } elsif ($record_result eq 'ignore') { $num_ignored++; - my $biblionumber = $bib_match; - if (defined $biblionumber and $item_result eq 'create_new') { - my ($bib_items_added, $bib_items_errored) = BatchCommitItems($rowref->{'import_record_id'}, $biblionumber); + if ($record_type eq 'biblio' and defined $recordid and $item_result eq 'create_new') { + my ($bib_items_added, $bib_items_errored) = BatchCommitItems($rowref->{'import_record_id'}, $recordid); $num_items_added += $bib_items_added; $num_items_errored += $bib_items_errored; # still need to record the matched biblionumber so that the # items can be reverted my $sth2 = $dbh->prepare_cached("UPDATE import_biblios SET matched_biblionumber = ? WHERE import_record_id = ?"); - $sth2->execute($biblionumber, $rowref->{'import_record_id'}); + $sth2->execute($recordid, $rowref->{'import_record_id'}); SetImportRecordOverlayStatus($rowref->{'import_record_id'}, 'match_applied'); } SetImportRecordStatus($rowref->{'import_record_id'}, 'ignored'); @@ -632,63 +697,84 @@ sub BatchCommitItems { return ($num_items_added, $num_items_errored); } -=head2 BatchRevertBibRecords +=head2 BatchRevertRecords my ($num_deleted, $num_errors, $num_reverted, $num_items_deleted, - $num_ignored) = BatchRevertBibRecords($batch_id); + $num_ignored) = BatchRevertRecords($batch_id); =cut -sub BatchRevertBibRecords { +sub BatchRevertRecords { my $batch_id = shift; + my $record_type; my $num_deleted = 0; my $num_errors = 0; my $num_reverted = 0; - my $num_items_deleted = 0; my $num_ignored = 0; + my $num_items_deleted = 0; # commit (i.e., save, all records in the batch) - # FIXME biblio only at the moment SetImportBatchStatus('reverting'); my $overlay_action = GetImportBatchOverlayAction($batch_id); my $nomatch_action = GetImportBatchNoMatchAction($batch_id); my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare("SELECT import_record_id, status, overlay_status, marcxml_old, encoding, matched_biblionumber + my $sth = $dbh->prepare("SELECT import_records.import_record_id, record_type, status, overlay_status, marcxml_old, encoding, matched_biblionumber, matched_authid FROM import_records - JOIN import_biblios USING (import_record_id) + LEFT JOIN import_auths ON (import_records.import_record_id=import_auths.import_record_id) + LEFT JOIN import_biblios ON (import_records.import_record_id=import_biblios.import_record_id) WHERE import_batch_id = ?"); $sth->execute($batch_id); while (my $rowref = $sth->fetchrow_hashref) { + $record_type = $rowref->{'record_type'}; if ($rowref->{'status'} eq 'error' or $rowref->{'status'} eq 'reverted') { $num_ignored++; next; } - my $bib_result = _get_revert_action($overlay_action, $rowref->{'overlay_status'}, $rowref->{'status'}); + my $record_result = _get_revert_action($overlay_action, $rowref->{'overlay_status'}, $rowref->{'status'}); - if ($bib_result eq 'delete') { - $num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'}); - my $error = DelBiblio($rowref->{'matched_biblionumber'}); + if ($record_result eq 'delete') { + my $error = undef; + if ($record_type eq 'biblio') { + $num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'}); + $error = DelBiblio($rowref->{'matched_biblionumber'}); + } else { + my $deletedauthid = DelAuthority($rowref->{'matched_authid'}); + } if (defined $error) { $num_errors++; } else { $num_deleted++; SetImportRecordStatus($rowref->{'import_record_id'}, 'reverted'); } - } elsif ($bib_result eq 'restore') { + } elsif ($record_result eq 'restore') { $num_reverted++; my $old_record = MARC::Record->new_from_xml(StripNonXmlChars($rowref->{'marcxml_old'}), 'UTF-8', $rowref->{'encoding'}); - my $biblionumber = $rowref->{'matched_biblionumber'}; - my $oldbiblio = GetBiblio($biblionumber); - $num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'}); - ModBiblio($old_record, $biblionumber, $oldbiblio->{'frameworkcode'}); + if ($record_type eq 'biblio') { + my $biblionumber = $rowref->{'matched_biblionumber'}; + my ($count, $oldbiblio) = GetBiblio($biblionumber); + $num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'}); + ModBiblio($old_record, $biblionumber, $oldbiblio->{'frameworkcode'}); + } else { + my $authid = $rowref->{'matched_authid'}; + ModAuthority($authid, $old_record, GuessAuthTypeCode($old_record)); + } SetImportRecordStatus($rowref->{'import_record_id'}, 'reverted'); - } elsif ($bib_result eq 'ignore') { - $num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'}); + } elsif ($record_result eq 'ignore') { + if ($record_type eq 'biblio') { + $num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'}); + } SetImportRecordStatus($rowref->{'import_record_id'}, 'reverted'); } - # remove matched_biblionumber only if there is no 'imported' item left - my $sth2 = $dbh->prepare_cached("UPDATE import_biblios SET matched_biblionumber = NULL WHERE import_record_id = ? AND NOT EXISTS (SELECT * FROM import_items WHERE import_items.import_record_id=import_biblios.import_record_id and status='imported')" ); + my $query; + if ($record_type eq 'biblio') { + # remove matched_biblionumber only if there is no 'imported' item left + $query = "UPDATE import_biblios SET matched_biblionumber = NULL WHERE import_record_id = ?"; + $query = "UPDATE import_biblios SET matched_biblionumber = NULL WHERE import_record_id = ? AND NOT EXISTS (SELECT * FROM import_items WHERE import_items.import_record_id=import_biblios.import_record_id and status='imported')"; + } else { + $query = "UPDATE import_auths SET matched_authid = NULL WHERE import_record_id = ?"; + } + my $sth2 = $dbh->prepare_cached($query); $sth2->execute($rowref->{'import_record_id'}); } @@ -862,24 +948,26 @@ sub GetNumberOfNonZ3950ImportBatches { return $count; } -=head2 GetImportBibliosRange +=head2 GetImportRecordsRange - my $results = GetImportBibliosRange($batch_id, $offset, $results_per_group); + my $results = GetImportRecordsRange($batch_id, $offset, $results_per_group); Returns a reference to an array of hash references corresponding to -import_biblios/import_records rows for a given batch +import_biblios/import_auths/import_records rows for a given batch starting at the given offset. =cut -sub GetImportBibliosRange { +sub GetImportRecordsRange { my ($batch_id, $offset, $results_per_group, $status) = @_; my $dbh = C4::Context->dbh; - my $query = "SELECT title, author, isbn, issn, import_record_id, record_sequence, - status, overlay_status, matched_biblionumber + my $query = "SELECT title, author, isbn, issn, authorized_heading, import_records.import_record_id, + record_sequence, status, overlay_status, + matched_biblionumber, matched_authid, record_type FROM import_records - JOIN import_biblios USING (import_record_id) + LEFT JOIN import_auths ON (import_records.import_record_id=import_auths.import_record_id) + LEFT JOIN import_biblios ON (import_records.import_record_id=import_biblios.import_record_id) WHERE import_batch_id = ?"; my @params; push(@params, $batch_id); @@ -1181,16 +1269,21 @@ sub GetImportRecordMatches { my $dbh = C4::Context->dbh; # FIXME currently biblio only - my $sth = $dbh->prepare_cached("SELECT title, author, biblionumber, score + my $sth = $dbh->prepare_cached("SELECT title, author, biblionumber, + candidate_match_id, score, record_type FROM import_records JOIN import_record_matches USING (import_record_id) - JOIN biblio ON (biblionumber = candidate_match_id) + LEFT JOIN biblio ON (biblionumber = candidate_match_id) WHERE import_record_id = ? ORDER BY score DESC, biblionumber DESC"); $sth->bind_param(1, $import_record_id); my $results = []; $sth->execute(); while (my $row = $sth->fetchrow_hashref) { + if ($row->{'record_type'} eq 'auth') { + $row->{'authorized_heading'} = GetAuthorizedHeading( { authid => $row->{'candidate_match_id'} } ); + } + next if ($row->{'record_type'} eq 'biblio' && not $row->{'biblionumber'}); push @$results, $row; last if $best_only; } @@ -1250,6 +1343,20 @@ sub _update_import_record_marc { $sth->finish(); } +sub _add_auth_fields { + my ($import_record_id, $marc_record) = @_; + + my $controlnumber; + if ($marc_record->field('001')) { + $controlnumber = $marc_record->field('001')->data(); + } + my $authorized_heading = GetAuthorizedHeading($marc_record); + my $dbh = C4::Context->dbh; + my $sth = $dbh->prepare("INSERT INTO import_auths (import_record_id, controlnumber, authorized_heading) VALUES (?, ?, ?)"); + $sth->execute($import_record_id, $controlnumber, $authorized_heading); + $sth->finish(); +} + sub _add_biblio_fields { my ($import_record_id, $marc_record) = @_; @@ -1293,7 +1400,7 @@ sub _update_batch_record_counts { my $dbh = C4::Context->dbh; my $sth = $dbh->prepare_cached("UPDATE import_batches SET - num_biblios = ( + num_records = ( SELECT COUNT(*) FROM import_records WHERE import_batch_id = import_batches.import_batch_id @@ -1311,26 +1418,45 @@ sub _update_batch_record_counts { } sub _get_commit_action { - my ($overlay_action, $nomatch_action, $item_action, $overlay_status, $import_record_id) = @_; + my ($overlay_action, $nomatch_action, $item_action, $overlay_status, $import_record_id, $record_type) = @_; - my ($bib_result, $bib_match, $item_result); - - if ($overlay_status ne 'no_match') { - $bib_match = GetBestRecordMatch($import_record_id); - if ($overlay_action eq 'replace') { - $bib_result = defined($bib_match) ? 'replace' : 'create_new'; - } elsif ($overlay_action eq 'create_new') { - $bib_result = 'create_new'; - } elsif ($overlay_action eq 'ignore') { - $bib_result = 'ignore'; - } - $item_result = ($item_action eq 'always_add' or $item_action eq 'add_only_for_matches') ? 'create_new' : 'ignore'; - } else { - $bib_result = $nomatch_action; - $item_result = ($item_action eq 'always_add' or $item_action eq 'add_only_for_new') ? 'create_new' : 'ignore'; - } + if ($record_type eq 'biblio') { + my ($bib_result, $bib_match, $item_result); + + if ($overlay_status ne 'no_match') { + $bib_match = GetBestRecordMatch($import_record_id); + if ($overlay_action eq 'replace') { + $bib_result = defined($bib_match) ? 'replace' : 'create_new'; + } elsif ($overlay_action eq 'create_new') { + $bib_result = 'create_new'; + } elsif ($overlay_action eq 'ignore') { + $bib_result = 'ignore'; + } + $item_result = ($item_action eq 'always_add' or $item_action eq 'add_only_for_matches') ? 'create_new' : 'ignore'; + } else { + $bib_result = $nomatch_action; + $item_result = ($item_action eq 'always_add' or $item_action eq 'add_only_for_new') ? 'create_new' : 'ignore'; + } + return ($bib_result, $item_result, $bib_match); + } else { # must be auths + my ($auth_result, $auth_match); + + if ($overlay_status ne 'no_match') { + $auth_match = GetBestRecordMatch($import_record_id); + if ($overlay_action eq 'replace') { + $auth_result = defined($auth_match) ? 'replace' : 'create_new'; + } elsif ($overlay_action eq 'create_new') { + $auth_result = 'create_new'; + } elsif ($overlay_action eq 'ignore') { + $auth_result = 'ignore'; + } + } else { + $auth_result = $nomatch_action; + } - return ($bib_result, $item_result, $bib_match); + return ($auth_result, undef, $auth_match); + + } } sub _get_revert_action { diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 59b5876c2a..c0634ec3a3 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -1,6 +1,6 @@ package C4::Matcher; -# Copyright (C) 2007 LibLime +# Copyright (C) 2007 LibLime, 2012 C & P Bibliography Services # # This file is part of Koha. # @@ -22,8 +22,6 @@ use warnings; use C4::Context; use MARC::Record; -use C4::Search; -use C4::Biblio; use vars qw($VERSION); @@ -384,6 +382,20 @@ sub delete { $sth->execute($matcher_id); # relying on cascading deletes to clean up everything } +=head2 record_type + + $matcher->record_type('biblio'); + my $record_type = $matcher->record_type(); + +Accessor method. + +=cut + +sub record_type { + my $self = shift; + @_ ? $self->{'record_type'} = shift : $self->{'record_type'}; +} + =head2 threshold $matcher->threshold(1000); @@ -582,7 +594,7 @@ sub add_simple_required_check { ); } -=head2 find_matches +=head2 get_matches my @matches = $matcher->get_matches($marc_record, $max_matches); foreach $match (@matches) { @@ -618,9 +630,37 @@ sub get_matches { my @source_keys = _get_match_keys($source_record, $matchpoint); next if scalar(@source_keys) == 0; # build query - my $query = join(" or ", map { "$matchpoint->{'index'}=$_" } @source_keys); - # FIXME only searching biblio index at the moment - my ($error, $searchresults, $total_hits) = SimpleSearch($query, 0, $max_matches); + my $query; + my $error; + my $searchresults; + my $total_hits; + if ($self->{'record_type'} eq 'biblio') { + $query = join(" or ", map { "$matchpoint->{'index'}=$_" } @source_keys); +# FIXME only searching biblio index at the moment + require C4::Search; + ($error, $searchresults, $total_hits) = C4::Search::SimpleSearch($query, 0, $max_matches); + } elsif ($self->{'record_type'} eq 'authority') { + my $authresults; + my @marclist; + my @and_or; + my @excluding = []; + my @operator; + my @value; + foreach my $key (@source_keys) { + push @marclist, $matchpoint->{'index'}; + push @and_or, 'or'; + push @operator, 'exact'; + push @value, $key; + } + require C4::AuthoritiesMarc; + ($authresults, $total_hits) = C4::AuthoritiesMarc::SearchAuthorities( + \@marclist, \@and_or, \@excluding, \@operator, + \@value, 0, 20, undef, 'AuthidAsc', 1 + ); + foreach my $result (@$authresults) { + push @$searchresults, $result->{'authid'}; + } + } if (defined $error ) { warn "search failed ($query) $error"; @@ -636,16 +676,23 @@ sub get_matches { # get rid of any that don't meet the required checks %matches = map { _passes_required_checks($source_record, $_, $self->{'required_checks'}) ? ($_ => $matches{$_}) : () } - keys %matches; + keys %matches unless ($self->{'record_type'} eq 'auth'); my @results = (); - foreach my $marcblob (keys %matches) { - my $target_record = MARC::Record->new_from_usmarc($marcblob); - my $result = TransformMarcToKoha(C4::Context->dbh, $target_record, ''); - # FIXME - again, bibliospecific - # also, can search engine be induced to give just the number in the first place? - my $record_number = $result->{'biblionumber'}; - push @results, { 'record_id' => $record_number, 'score' => $matches{$marcblob} }; + if ($self->{'record_type'} eq 'biblio') { + require C4::Biblio; + foreach my $marcblob (keys %matches) { + my $target_record = MARC::Record->new_from_usmarc($marcblob); + my $record_number; + my $result = C4::Biblio::TransformMarcToKoha(C4::Context->dbh, $target_record, ''); + $record_number = $result->{'biblionumber'}; + push @results, { 'record_id' => $record_number, 'score' => $matches{$marcblob} }; + } + } elsif ($self->{'record_type'} eq 'authority') { + require C4::AuthoritiesMarc; + foreach my $authid (keys %matches) { + push @results, { 'record_id' => $authid, 'score' => $matches{$authid} }; + } } @results = sort { $b->{'score'} cmp $a->{'score'} } @results; if (scalar(@results) > $max_matches) { @@ -673,6 +720,7 @@ sub dump { $result->{'matcher_id'} = $self->{'id'}; $result->{'code'} = $self->{'code'}; $result->{'description'} = $self->{'description'}; + $result->{'record_type'} = $self->{'record_type'}; $result->{'matchpoints'} = []; foreach my $matchpoint (@{ $self->{'matchpoints'} }) { diff --git a/acqui/addorderiso2709.pl b/acqui/addorderiso2709.pl index 22001f1e67..3dc83335c1 100755 --- a/acqui/addorderiso2709.pl +++ b/acqui/addorderiso2709.pl @@ -156,7 +156,7 @@ if ($op eq ""){ # retrieve the file you want to import my $import_batch_id = $cgiparams->{'import_batch_id'}; - my $biblios = GetImportBibliosRange($import_batch_id); + my $biblios = GetImportRecordsRange($import_batch_id); for my $biblio (@$biblios){ # 1st insert the biblio, or find it through matcher my ( $marcblob, $encoding ) = GetImportRecordMarc( $biblio->{'import_record_id'} ); @@ -330,7 +330,7 @@ sub import_batches_list { foreach my $batch (@$batches) { if ($batch->{'import_status'} eq "staged") { # check if there is at least 1 line still staged - my $stagedList=GetImportBibliosRange($batch->{'import_batch_id'}, undef, undef, 'staged'); + my $stagedList=GetImportRecordsRange($batch->{'import_batch_id'}, undef, undef, 'staged'); if (scalar @$stagedList) { my ($staged_date, $staged_hour) = split (/ /, $batch->{'upload_timestamp'}); push @list, { @@ -357,7 +357,7 @@ sub import_batches_list { sub import_biblios_list { my ($template, $import_batch_id) = @_; my $batch = GetImportBatch($import_batch_id,'staged'); - my $biblios = GetImportBibliosRange($import_batch_id,'','','staged'); + my $biblios = GetImportRecordsRange($import_batch_id,'','','staged'); my @list = (); foreach my $biblio (@$biblios) { diff --git a/admin/matching-rules.pl b/admin/matching-rules.pl index 2fa2ac6429..ea86c480fb 100755 --- a/admin/matching-rules.pl +++ b/admin/matching-rules.pl @@ -92,9 +92,10 @@ sub add_matching_rule_form { sub add_update_matching_rule { my $template = shift; my $matcher_id = shift; + my $record_type = $input->param('record_type') || 'biblio'; # do parsing - my $matcher = C4::Matcher->new('biblio', 1000); # FIXME biblio only for now + my $matcher = C4::Matcher->new($record_type, 1000); $matcher->code($input->param('code')); $matcher->description($input->param('description')); $matcher->threshold($input->param('threshold')); @@ -203,10 +204,11 @@ sub edit_matching_rule_form { my $matcher = C4::Matcher->fetch($matcher_id); - $template->param(matcher_id => $matcher_id); - $template->param(code => $matcher->code()); - $template->param(description => $matcher->description()); - $template->param(threshold => $matcher->threshold()); + $template->{VARS}->{'matcher_id'} = $matcher_id; + $template->{VARS}->{'code'} = $matcher->code(); + $template->{VARS}->{'description'} = $matcher->description(); + $template->{VARS}->{'threshold'} = $matcher->threshold(); + $template->{VARS}->{'record_type'} = $matcher->record_type(); my $matcher_info = $matcher->dump(); my @matchpoints = (); diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt index 58352447ef..2bd8aafc5b 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/matching-rules.tt @@ -220,6 +220,17 @@ function CheckRuleForm(f) { Required +
  • + Required +
  • diff --git a/misc/cronjobs/import_webservice_batch.pl b/misc/cronjobs/import_webservice_batch.pl index d1b30ef94a..3f57779b0a 100755 --- a/misc/cronjobs/import_webservice_batch.pl +++ b/misc/cronjobs/import_webservice_batch.pl @@ -54,4 +54,4 @@ EOF my $batch_ids = GetStagedWebserviceBatches() or exit; $framework ||= ''; -BatchCommitBibRecords($_, $framework) foreach @$batch_ids; +BatchCommitRecords($_, $framework) foreach @$batch_ids; diff --git a/svc/import_bib b/svc/import_bib index a029a58c84..3e43339fde 100755 --- a/svc/import_bib +++ b/svc/import_bib @@ -89,7 +89,7 @@ sub import_bib { my $number_of_matches = BatchFindBibDuplicates($batch_id, $matcher); # XXX we are ignoring the result of this; - BatchCommitBibRecords($batch_id, $framework) if lc($import_mode) eq 'direct'; + BatchCommitRecords($batch_id, $framework) if lc($import_mode) eq 'direct'; my $dbh = C4::Context->dbh(); my $sth = $dbh->prepare("SELECT matched_biblionumber FROM import_biblios WHERE import_record_id =?"); diff --git a/tools/manage-marc-import.pl b/tools/manage-marc-import.pl index 03f5bb98e3..de2cbf6ba5 100755 --- a/tools/manage-marc-import.pl +++ b/tools/manage-marc-import.pl @@ -99,7 +99,7 @@ if ($op eq "") { if ($import_batch_id eq '') { import_batches_list($template, $offset, $results_per_page); } else { - import_biblios_list($template, $import_batch_id, $offset, $results_per_page); + import_records_list($template, $import_batch_id, $offset, $results_per_page); } } elsif ($op eq "commit-batch") { if ($completedJobID) { @@ -108,14 +108,14 @@ if ($op eq "") { my $framework = $input->param('framework'); commit_batch($template, $import_batch_id, $framework); } - import_biblios_list($template, $import_batch_id, $offset, $results_per_page); + import_records_list($template, $import_batch_id, $offset, $results_per_page); } elsif ($op eq "revert-batch") { if ($completedJobID) { add_saved_job_results_to_template($template, $completedJobID); } else { revert_batch($template, $import_batch_id); } - import_biblios_list($template, $import_batch_id, $offset, $results_per_page); + import_records_list($template, $import_batch_id, $offset, $results_per_page); } elsif ($op eq "clean-batch") { CleanBatch($import_batch_id); import_batches_list($template, $offset, $results_per_page); @@ -131,7 +131,7 @@ if ($op eq "") { my $item_action = $input->param('item_action'); redo_matching($template, $import_batch_id, $new_matcher_id, $current_matcher_id, $overlay_action, $nomatch_action, $item_action); - import_biblios_list($template, $import_batch_id, $offset, $results_per_page); + import_records_list($template, $import_batch_id, $offset, $results_per_page); } output_html_with_http_headers $input, $cookie, $template->output; @@ -163,21 +163,17 @@ sub redo_matching { $template->param('changed_item_action' => 1); } - if ($new_matcher_id eq $current_matcher_id) { - return; - } - my $num_with_matches = 0; if (defined $new_matcher_id and $new_matcher_id ne "") { my $matcher = C4::Matcher->fetch($new_matcher_id); if (defined $matcher) { - $num_with_matches = BatchFindBibDuplicates($import_batch_id, $matcher); + $num_with_matches = BatchFindDuplicates($import_batch_id, $matcher); SetImportBatchMatcher($import_batch_id, $new_matcher_id); } else { $rematch_failed = 1; } } else { - $num_with_matches = BatchFindBibDuplicates($import_batch_id, undef); + $num_with_matches = BatchFindDuplicates($import_batch_id, undef); SetImportBatchMatcher($import_batch_id, undef); SetImportBatchOverlayAction('create_new'); } @@ -214,13 +210,14 @@ sub import_batches_list { foreach my $batch (@$batches) { push @list, { import_batch_id => $batch->{'import_batch_id'}, - num_biblios => $batch->{'num_biblios'}, + num_records => $batch->{'num_records'}, num_items => $batch->{'num_items'}, upload_timestamp => $batch->{'upload_timestamp'}, import_status => $batch->{'import_status'}, file_name => $batch->{'file_name'} || "($batch->{'batch_type'})", comments => $batch->{'comments'}, can_clean => ($batch->{'import_status'} ne 'cleaned') ? 1 : 0, + record_type => $batch->{'record_type'}, }; } $template->param(batch_list => \@list); @@ -244,7 +241,7 @@ sub commit_batch { $callback = progress_callback($job, $dbh); } my ($num_added, $num_updated, $num_items_added, $num_items_errored, $num_ignored) = - BatchCommitBibRecords($import_batch_id, $framework, 50, $callback); + BatchCommitRecords($import_batch_id, $framework, 50, $callback); $dbh->commit(); my $results = { @@ -273,7 +270,7 @@ sub revert_batch { $callback = progress_callback($job, $dbh); } my ($num_deleted, $num_errors, $num_reverted, $num_items_deleted, $num_ignored) = - BatchRevertBibRecords($import_batch_id, 50, $callback); + BatchRevertRecords($import_batch_id, 50, $callback); $dbh->commit(); my $results = { @@ -295,7 +292,7 @@ sub put_in_background { my $import_batch_id = shift; my $batch = GetImportBatch($import_batch_id); - my $job = C4::BackgroundJob->new($sessionID, $batch->{'file_name'}, $ENV{'SCRIPT_NAME'}, $batch->{'num_biblios'}); + my $job = C4::BackgroundJob->new($sessionID, $batch->{'file_name'}, $ENV{'SCRIPT_NAME'}, $batch->{'num_records'}); my $jobID = $job->id(); # fork off @@ -350,46 +347,53 @@ sub add_saved_job_results_to_template { add_results_to_template($template, $results); } -sub import_biblios_list { +sub import_records_list { my ($template, $import_batch_id, $offset, $results_per_page) = @_; my $batch = GetImportBatch($import_batch_id); - my $biblios = GetImportBibliosRange($import_batch_id, $offset, $results_per_page); + my $records = GetImportRecordsRange($import_batch_id, $offset, $results_per_page); my @list = (); - foreach my $biblio (@$biblios) { - my $citation = $biblio->{'title'}; - $citation .= " $biblio->{'author'}" if $biblio->{'author'}; - $citation .= " (" if $biblio->{'issn'} or $biblio->{'isbn'}; - $citation .= $biblio->{'isbn'} if $biblio->{'isbn'}; - $citation .= ", " if $biblio->{'issn'} and $biblio->{'isbn'}; - $citation .= $biblio->{'issn'} if $biblio->{'issn'}; - $citation .= ")" if $biblio->{'issn'} or $biblio->{'isbn'}; - - my $match = GetImportRecordMatches($biblio->{'import_record_id'}, 1); + foreach my $record (@$records) { + my $citation = $record->{'title'} || $record->{'authorized_heading'}; + $citation .= " $record->{'author'}" if $record->{'author'}; + $citation .= " (" if $record->{'issn'} or $record->{'isbn'}; + $citation .= $record->{'isbn'} if $record->{'isbn'}; + $citation .= ", " if $record->{'issn'} and $record->{'isbn'}; + $citation .= $record->{'issn'} if $record->{'issn'}; + $citation .= ")" if $record->{'issn'} or $record->{'isbn'}; + + my $match = GetImportRecordMatches($record->{'import_record_id'}, 1); my $match_citation = ''; if ($#$match > -1) { - $match_citation .= $match->[0]->{'title'} if defined($match->[0]->{'title'}); - $match_citation .= ' ' . $match->[0]->{'author'} if defined($match->[0]->{'author'}); + if ($match->[0]->{'record_type'} eq 'biblio') { + $match_citation .= $match->[0]->{'title'} if defined($match->[0]->{'title'}); + $match_citation .= ' ' . $match->[0]->{'author'} if defined($match->[0]->{'author'}); + } elsif ($match->[0]->{'record_type'} eq 'auth') { + $match_citation .= $match->[0]->{'authorized_heading'} if defined($match->[0]->{'authorized_heading'}); + } } push @list, - { import_record_id => $biblio->{'import_record_id'}, - final_match_biblionumber => $biblio->{'matched_biblionumber'}, + { import_record_id => $record->{'import_record_id'}, + final_match_id => $record->{'matched_biblionumber'} || $record->{'matched_authid'}, citation => $citation, - status => $biblio->{'status'}, - record_sequence => $biblio->{'record_sequence'}, - overlay_status => $biblio->{'overlay_status'}, - match_biblionumber => $#$match > -1 ? $match->[0]->{'biblionumber'} : 0, + status => $record->{'status'}, + record_sequence => $record->{'record_sequence'}, + overlay_status => $record->{'overlay_status'}, + # Sorry about the match_id being from the "biblionumber" field; + # as it turns out, any match id will go in biblionumber + match_id => $#$match > -1 ? $match->[0]->{'biblionumber'} : 0, match_citation => $match_citation, match_score => $#$match > -1 ? $match->[0]->{'score'} : 0, + record_type => $record->{'record_type'}, }; } - my $num_biblios = $batch->{'num_biblios'}; - $template->param(biblio_list => \@list); - add_page_numbers($template, $offset, $results_per_page, $num_biblios); + my $num_records = $batch->{'num_records'}; + $template->param(record_list => \@list); + add_page_numbers($template, $offset, $results_per_page, $num_records); $template->param(offset => $offset); $template->param(range_top => $offset + $results_per_page - 1); - $template->param(num_results => $num_biblios); + $template->param(num_results => $num_records); $template->param(results_per_page => $results_per_page); $template->param(import_batch_id => $import_batch_id); my $overlay_action = GetImportBatchOverlayAction($import_batch_id); @@ -412,12 +416,13 @@ sub batch_info { $template->param(comments => $batch->{'comments'}); $template->param(import_status => $batch->{'import_status'}); $template->param(upload_timestamp => $batch->{'upload_timestamp'}); - $template->param(num_biblios => $batch->{'num_biblios'}); - $template->param(num_items => $batch->{'num_biblios'}); + $template->{VARS}->{'record_type'} = $batch->{'record_type'}; + $template->param(num_records => $batch->{'num_records'}); + $template->param(num_items => $batch->{'num_items'}); if ($batch->{'import_status'} ne 'cleaned') { $template->param(can_clean => 1); } - if ($batch->{'num_biblios'} > 0) { + if ($batch->{'num_records'} > 0) { if ($batch->{'import_status'} eq 'staged' or $batch->{'import_status'} eq 'reverted') { $template->param(can_commit => 1); } diff --git a/tools/stage-marc-import.pl b/tools/stage-marc-import.pl index 0de73d46ec..ef3ade61ba 100755 --- a/tools/stage-marc-import.pl +++ b/tools/stage-marc-import.pl @@ -130,7 +130,7 @@ if ($completedJobID) { } # FIXME branch code - my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($encoding, $marcrecord, $filename, $comments, '', $parse_items, 0, 50, staging_progress_callback($job, $dbh)); + my ($batch_id, $num_valid, $num_items, @import_errors) = BatchStageMarcRecords($record_type, $encoding, $marcrecord, $filename, $comments, '', $parse_items, 0, 50, staging_progress_callback($job, $dbh)); $dbh->commit(); @@ -143,7 +143,7 @@ if ($completedJobID) { if (defined $matcher) { $checked_matches = 1; $matcher_code = $matcher->code(); - $num_with_matches = BatchFindBibDuplicates($batch_id, $matcher, + $num_with_matches = BatchFindDuplicates($batch_id, $matcher, 10, 50, matching_progress_callback($job, $dbh)); SetImportBatchMatcher($batch_id, $matcher_id); SetImportBatchOverlayAction($batch_id, $overlay_action); -- 2.39.5