From 50f402405591ceb77c510f7cacd62749ac9e3b59 Mon Sep 17 00:00:00 2001 From: David Cook Date: Tue, 19 Jan 2016 16:37:09 +1100 Subject: [PATCH] Bug 15541 - Prevent normalization during matching/import process This patch allows you to use the "qualifier,qualifier" syntax in the Record Matching Rules "Search Index" when using the QueryParser. While QueryParser doesn't support this syntax, it will now fallback correctly to non-QueryParser functionality. Without the patch, your search will just fail silently. This patch also adds a "skip_normalize" option to C4::Search::SimpleSearch(), and uses the option during C4::Matcher::get_matches. This prevents the s/:/=/g and s/=/:/g normalization. This normalization is heavy-handed, and while it is necessary sometimes to generate a valid CCL query or QueryParser query, C4::Matcher::get_matches() already produces a valid CCL query, so we don't need to do this normalization. Additionally, this normalization causes problems when you use a Zebra register which isn't normalized: namely the "u" register. Strings are stored "as is", so http://localhost/koha/resource/1 is stored as is during indexing. When you search, you need to pass the exact same thing through the query to get a match. Using http=//localhost/koha/resource/1 in your query will yield zero results. _TEST PLAN_ 0) Apply patch 1) Create a Record Matching Rule in Koha with the following details: Matching rule code: TEST Description: Test Match threshold: 100 Record type: Bibliographic Match point 1: Search index: id-other,st-urx Score: 100 Tag: 024 Subfields: a Normalization rule: None 2) Create a record in Koha with an indexable URI 2a) Default framework 2b) 024 $a http://koha-community.org/test $2 uri 2c) 040 $c test 2d) 245 $a This is a test record 2e) 942 $c Books 2f) Save (save again if cautioned about missing fields as these should autofill) 3) Do a full re-index of Zebra 4) Download your record from Koha as a .mrc file (ie isomarc, binary marc, etc) 5) Go to "Stage MARC records for import" 5a) Upload your .marc file. 5b) Change your "Record matching rule" to "Test" 5c) Click Stage for import 9) It should say "1 records with at least one match in catalog per matching rule "Test". NOTE: For completeness, you can go through this process on a clean master branch, and note that it will say '0 records with at least one match in catalog per matching rule "TEST"' Signed-off-by: Alex Buckley Signed-off-by: Marcel de Rooy Signed-off-by: Kyle M Hall --- C4/Matcher.pm | 6 ++++-- C4/Search.pm | 9 +++++---- Koha/SearchEngine/Elasticsearch/Search.pm | 6 +++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/C4/Matcher.pm b/C4/Matcher.pm index 40e6e5f030..670b2efed2 100644 --- a/C4/Matcher.pm +++ b/C4/Matcher.pm @@ -649,7 +649,9 @@ sub get_matches { my $total_hits; if ( $self->{'record_type'} eq 'biblio' ) { - if ($QParser) { + #NOTE: The QueryParser can't handle the CCL syntax of 'qualifier','qualifier', so fallback to non-QueryParser. + #NOTE: You can see this in C4::Search::SimpleSearch() as well in a different way. + if ($QParser && $matchpoint->{'index'} !~ m/\w,\w/) { $query = join( " || ", map { "$matchpoint->{'index'}:$_" } @source_keys ); } @@ -662,7 +664,7 @@ sub get_matches { my $searcher = Koha::SearchEngine::Search->new({index => $Koha::SearchEngine::BIBLIOS_INDEX}); ( $error, $searchresults, $total_hits ) = - $searcher->simple_search_compat( $query, 0, $max_matches ); + $searcher->simple_search_compat( $query, 0, $max_matches, undef, skip_normalize => 1 ); } elsif ( $self->{'record_type'} eq 'authority' ) { my $authresults; diff --git a/C4/Search.pm b/C4/Search.pm index 4270c82621..2fd5d793cc 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -160,7 +160,7 @@ sub FindDuplicate { =head2 SimpleSearch -( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] ); +( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers], [%options] ); This function provides a simple search API on the bibliographic catalog @@ -172,6 +172,7 @@ This function provides a simple search API on the bibliographic catalog * @servers is optional. Defaults to biblioserver as found in koha-conf.xml * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef. + * %options is optional. (e.g. "skip_normalize" allows you to skip changing : to = ) =item C @@ -221,7 +222,7 @@ $template->param(result=>\@results); =cut sub SimpleSearch { - my ( $query, $offset, $max_results, $servers ) = @_; + my ( $query, $offset, $max_results, $servers, %options ) = @_; return ( 'No query entered', undef, undef ) unless $query; # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too. @@ -243,12 +244,12 @@ sub SimpleSearch { eval { $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 ); if ($QParser) { - $query =~ s/=/:/g; + $query =~ s/=/:/g unless $options{skip_normalize}; $QParser->parse( $query ); $query = $QParser->target_syntax($servers[$i]); $zoom_queries[$i] = new ZOOM::Query::PQF( $query, $zconns[$i]); } else { - $query =~ s/:/=/g; + $query =~ s/:/=/g unless $options{skip_normalize}; $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]); } $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] ); diff --git a/Koha/SearchEngine/Elasticsearch/Search.pm b/Koha/SearchEngine/Elasticsearch/Search.pm index c3cdb15dad..ae2dd4c68b 100644 --- a/Koha/SearchEngine/Elasticsearch/Search.pm +++ b/Koha/SearchEngine/Elasticsearch/Search.pm @@ -268,7 +268,7 @@ sub count_auth_use { =head2 simple_search_compat my ( $error, $marcresults, $total_hits ) = - $searcher->simple_search( $query, $offset, $max_results ); + $searcher->simple_search( $query, $offset, $max_results, %options ); This is a simpler interface to the searching, intended to be similar enough to L. @@ -291,6 +291,10 @@ How many results to skip from the start of the results. The max number of results to return. The default is 100 (because unlimited is a pretty terrible thing to do.) +=item C<%options> + +These options are unused by Elasticsearch + =back Returns: -- 2.39.5