From 2708db52778944f41b36379cc5f202a8ddea6a5a Mon Sep 17 00:00:00 2001 From: Jared Camins-Esakov Date: Sat, 11 May 2013 10:40:09 -0400 Subject: [PATCH 1/3] Bug 10230: no need to use SimpleSearch for matching auths When introducing QueryParser, I introduced a check for QueryParser at too high a level, causing authority matching to try and use SimpleSearch for authorities prematurely, when SearchAuthorities should be handling it. This patch corrects the level of the check. This patch only moves three lines, but thanks to the change in if level, it adjusts the indentation quite a bit. Signed-off-by: Katrin Fischer Comments on third patch of this series. Signed-off-by: Jared Camins-Esakov --- C4/Matcher.pm | 75 ++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 34 deletions(-) diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 6a7c4f2226..4e064be143 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -628,51 +628,58 @@ sub get_matches { my $QParser; $QParser = C4::Context->queryparser if (C4::Context->preference('UseQueryParser')); - foreach my $matchpoint (@{ $self->{'matchpoints'} }) { - my @source_keys = _get_match_keys($source_record, $matchpoint); + foreach my $matchpoint ( @{ $self->{'matchpoints'} } ) { + my @source_keys = _get_match_keys( $source_record, $matchpoint ); next if scalar(@source_keys) == 0; + # build query my $query; my $error; my $searchresults; my $total_hits; - if ($QParser) { - $query = join(" || ", map { "$matchpoint->{'index'}:$_" } @source_keys); + if ( $self->{'record_type'} eq 'biblio' ) { + if ($QParser) { + $query = join( " || ", + map { "$matchpoint->{'index'}:$_" } @source_keys ); + } + else { + $query = join( " or ", + map { "$matchpoint->{'index'}=$_" } @source_keys ); + } require C4::Search; - ($error, $searchresults, $total_hits) = C4::Search::SimpleSearch($query, 0, $max_matches, [ $self->{'record_type'} . 'server' ] ); - } else { - if ($self->{'record_type'} eq 'biblio') { - $query = join(" or ", map { "$matchpoint->{'index'}=$_" } @source_keys); - require C4::Search; - ($error, $searchresults, $total_hits) = C4::Search::SimpleSearch($query, 0, $max_matches); - } elsif ($self->{'record_type'} eq 'authority') { - my $authresults; - my @marclist; - my @and_or; - my @excluding = []; - my @operator; - my @value; - foreach my $key (@source_keys) { - push @marclist, $matchpoint->{'index'}; - push @and_or, 'or'; - push @operator, 'exact'; - push @value, $key; - } - require C4::AuthoritiesMarc; - ($authresults, $total_hits) = C4::AuthoritiesMarc::SearchAuthorities( - \@marclist, \@and_or, \@excluding, \@operator, - \@value, 0, 20, undef, 'AuthidAsc', 1 - ); - foreach my $result (@$authresults) { - push @$searchresults, $result->{'authid'}; - } + ( $error, $searchresults, $total_hits ) = + C4::Search::SimpleSearch( $query, 0, $max_matches ); + } + elsif ( $self->{'record_type'} eq 'authority' ) { + my $authresults; + my @marclist; + my @and_or; + my @excluding = []; + my @operator; + my @value; + foreach my $key (@source_keys) { + push @marclist, $matchpoint->{'index'}; + push @and_or, 'or'; + push @operator, 'exact'; + push @value, $key; + } + require C4::AuthoritiesMarc; + ( $authresults, $total_hits ) = + C4::AuthoritiesMarc::SearchAuthorities( + \@marclist, \@and_or, \@excluding, \@operator, + \@value, 0, 20, undef, + 'AuthidAsc', 1 + ); + foreach my $result (@$authresults) { + push @$searchresults, $result->{'authid'}; } } - if (defined $error ) { + if ( defined $error ) { warn "search failed ($query) $error"; - } else { - foreach my $matched (@{$searchresults}) { + } + else { + foreach my $matched ( @{$searchresults} ) { $matches{$matched} += $matchpoint->{'score'}; } } From 4f9174349c99574df0933f2b660938998580d8ba Mon Sep 17 00:00:00 2001 From: Jared Camins-Esakov Date: Sat, 11 May 2013 15:19:58 -0400 Subject: [PATCH 2/3] Bug 10230: Don't limit valid matches to bibs The patch for bug 9523 added a JOIN to the biblio table when identifying the best match so that if a matched record had been deleted it would not hold up the import process. Unfortunately, this broke all authority matching, since of course authorities don't appear in the biblio table. This patch adds a join to auth_header as well, and decides which to check based on the record type. Signed-off-by: Katrin Fischer Comment on third patch of this series. Signed-off-by: Jared Camins-Esakov --- C4/ImportBatch.pm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm index a5befe69da..3ce5c67e59 100644 --- a/C4/ImportBatch.pm +++ b/C4/ImportBatch.pm @@ -1036,8 +1036,12 @@ sub GetBestRecordMatch { my $dbh = C4::Context->dbh; my $sth = $dbh->prepare("SELECT candidate_match_id FROM import_record_matches - JOIN biblio ON ( candidate_match_id = biblionumber ) - WHERE import_record_id = ? + JOIN import_records ON ( import_record_matches.import_record_id = import_records.import_record_id ) + LEFT JOIN biblio ON ( candidate_match_id = biblio.biblionumber ) + LEFT JOIN auth_header ON ( candidate_match_id = auth_header.authid ) + WHERE import_record_matches.import_record_id = ? AND + ( (import_records.record_type = 'biblio' AND biblio.biblionumber IS NOT NULL) OR + (import_records.record_type = 'auth' AND auth_header.authid IS NOT NULL) ) ORDER BY score DESC, candidate_match_id DESC"); $sth->execute($import_record_id); my ($record_id) = $sth->fetchrow_array(); From a278407a476c67c334e1e76fc72c1871a7328af5 Mon Sep 17 00:00:00 2001 From: Jared Camins-Esakov Date: Sat, 11 May 2013 16:07:43 -0400 Subject: [PATCH 3/3] Bug 10230: show correct matching record The correct matching record was not always shown on the manage staged MARC page. This patch corrects the issue and provides the template with the appropriate ID for the matched record regardless what type of record it is. To test: 1) Create a matching record for authorities. For MARC21, the following is a good choice: Matching rule code: AUTHPER Description: Personal name main entry Match threshold: 999 Record type: Authority record [Match point 1:] Search index: mainmainentry Score: 1000 Tag: 100 Subfields: a 2) Create a record that has the appropriate fields for being matched with that rule (if you don't already have one). 3) Save the authority record as MARC (Unicode/UTF-8). 4) Stage the file choosing your new matching rule. 5) Note that with these patches, you get a link to the existing authority and without them you could get any number of strange things. Signed-off-by: Katrin Fischer Passes all tests and QA script. Additional tests done: - staged bibliographic records and matched with different rules and actions. Undid the import. - staged autohrity recods and matched with the example rule. Undid the import. Signed-off-by: Jared Camins-Esakov --- tools/manage-marc-import.pl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/manage-marc-import.pl b/tools/manage-marc-import.pl index 32d3645ab5..25f7769c1c 100755 --- a/tools/manage-marc-import.pl +++ b/tools/manage-marc-import.pl @@ -365,12 +365,17 @@ sub import_records_list { my $match = GetImportRecordMatches($record->{'import_record_id'}, 1); my $match_citation = ''; + my $match_id; if ($#$match > -1) { if ($match->[0]->{'record_type'} eq 'biblio') { $match_citation .= $match->[0]->{'title'} if defined($match->[0]->{'title'}); $match_citation .= ' ' . $match->[0]->{'author'} if defined($match->[0]->{'author'}); + $match_id = $match->[0]->{'biblionumber'}; } elsif ($match->[0]->{'record_type'} eq 'auth') { - $match_citation .= $match->[0]->{'authorized_heading'} if defined($match->[0]->{'authorized_heading'}); + if (defined($match->[0]->{'authorized_heading'})) { + $match_citation .= $match->[0]->{'authorized_heading'}; + $match_id = $match->[0]->{'candidate_match_id'}; + } } } @@ -383,7 +388,7 @@ sub import_records_list { overlay_status => $record->{'overlay_status'}, # Sorry about the match_id being from the "biblionumber" field; # as it turns out, any match id will go in biblionumber - match_id => $#$match > -1 ? $match->[0]->{'biblionumber'} : 0, + match_id => $match_id, match_citation => $match_citation, match_score => $#$match > -1 ? $match->[0]->{'score'} : 0, record_type => $record->{'record_type'},