Bug 8211: Add exploded search options
When working with hierarchical subject headings, it is sometimes helpful to do a search for all records with a specific subject, plus broader/narrower/related subjects. This patch adds support for these "exploded" subject searches to Koha. To test: 1) Make sure you have a bunch of hierarchical subjects. I created geographical subjects for "Arizona," "United States," and "Phoenix," and linked them together using 551s, and made sure I had a half dozen records linking to each (but not all to all three). 2) Do a search for su-br:Arizona (or choose "Subject and broader terms" on the advanced search screen with "more options" displayed), and check that you get the records with the subject "Arizona" and the records with the subject "United States" 3) Do a search for su-na:Arizona (or choose "Subject and narrower terms" on the advanced search screen with "more options" displayed), and check that you get the records with the subject "Arizona" and the records with the subject "Phoenix" 4) Do a search for su-rl:Arizona (or choose "Subject and related terms" on the advanced search screen with "more options" displayed), and check that you get the records with the subject "Arizona," the records with the subject "United States," and the records with the subject "Phoenix" 5) Ensure that other searches still work (keyword, subject, ccl, whatever) 6) Sign off Technical details: This patch adds a shim in front of C4::Search::buildQuery in order to preprocess the query and call the _handle_exploding_search callback. This shim will allow us to gradually offload query parsing to a new query parser module. Signed-off-by: wajasu <matted-34813@mypacks.net> Signed-off-by: Paul Poulain <paul.poulain@biblibre.com>
This commit is contained in:
parent
401d9e4479
commit
4feb6a4736
4 changed files with 190 additions and 2 deletions
102
C4/Search.pm
102
C4/Search.pm
|
@ -34,6 +34,8 @@ use C4::Charset;
|
|||
use YAML;
|
||||
use URI::Escape;
|
||||
use Business::ISBN;
|
||||
use MARC::Record;
|
||||
use MARC::Field;
|
||||
|
||||
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
|
||||
|
||||
|
@ -1029,6 +1031,104 @@ sub getIndexes{
|
|||
return \@indexes;
|
||||
}
|
||||
|
||||
=head2 _handle_exploding_index
|
||||
|
||||
my $query = _handle_exploding_index($index, $term)
|
||||
|
||||
Callback routine to generate the search for "exploding" indexes (i.e.
|
||||
those indexes which are turned into multiple or-connected searches based
|
||||
on authority data).
|
||||
|
||||
=cut
|
||||
|
||||
sub _handle_exploding_index {
|
||||
my ( $index, $term ) = @_;
|
||||
|
||||
return unless ($index =~ m/(su-br|su-na|su-rl)/ && $term);
|
||||
|
||||
my $marcflavour = C4::Context->preference('marcflavour');
|
||||
|
||||
my $codesubfield = $marcflavour eq 'UNIMARC' ? '5' : 'w';
|
||||
my $wantedcodes = '';
|
||||
my @subqueries = ( "(su=\"$term\")");
|
||||
my ($error, $results, $total_hits) = SimpleSearch( "Heading,wrdl=$term", undef, undef, [ "authorityserver" ] );
|
||||
foreach my $auth (@$results) {
|
||||
my $record = MARC::Record->new_from_usmarc($auth);
|
||||
my @references = $record->field('5..');
|
||||
if (@references) {
|
||||
if ($index eq 'su-br') {
|
||||
$wantedcodes = 'g';
|
||||
} elsif ($index eq 'su-na') {
|
||||
$wantedcodes = 'h';
|
||||
} elsif ($index eq 'su-rl') {
|
||||
$wantedcodes = '';
|
||||
}
|
||||
foreach my $reference (@references) {
|
||||
my $codes = $reference->subfield($codesubfield);
|
||||
push @subqueries, '(su="' . $reference->as_string('abcdefghijlmnopqrstuvxyz') . '")' if (($codes && $codes eq $wantedcodes) || !$wantedcodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
return join(' or ', @subqueries);
|
||||
}
|
||||
|
||||
=head2 parseQuery
|
||||
|
||||
( $operators, $operands, $indexes, $limits,
|
||||
$sort_by, $scan, $lang ) =
|
||||
buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
|
||||
|
||||
Shim function to ease the transition from buildQuery to a new QueryParser.
|
||||
This function is called at the beginning of buildQuery, and modifies
|
||||
buildQuery's input. If it can handle the input, it returns a query that
|
||||
buildQuery will not try to parse.
|
||||
=cut
|
||||
|
||||
sub parseQuery {
|
||||
my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
|
||||
|
||||
my @operators = $operators ? @$operators : ();
|
||||
my @indexes = $indexes ? @$indexes : ();
|
||||
my @operands = $operands ? @$operands : ();
|
||||
my @limits = $limits ? @$limits : ();
|
||||
my @sort_by = $sort_by ? @$sort_by : ();
|
||||
|
||||
my $query = $operands[0];
|
||||
my $index;
|
||||
my $term;
|
||||
|
||||
# TODO: once we are using QueryParser, all this special case code for
|
||||
# exploded search indexes will be replaced by a callback to
|
||||
# _handle_exploding_index
|
||||
if ( $query =~ m/^(.*)\b(su-br|su-na|su-rl)[:=](\w.*)$/ ) {
|
||||
$query = $1;
|
||||
$index = $2;
|
||||
$term = $3;
|
||||
} else {
|
||||
$query = '';
|
||||
for ( my $i = 0 ; $i <= @operands ; $i++ ) {
|
||||
if ($operands[$i] && $indexes[$i] =~ m/(su-br|su-na|su-rl)/) {
|
||||
$index = $indexes[$i];
|
||||
$term = $operands[$i];
|
||||
} elsif ($operands[$i]) {
|
||||
$query .= $operators[$i] eq 'or' ? ' or ' : ' and ' if ($query);
|
||||
$query .= "($indexes[$i]:$operands[$i])";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($index) {
|
||||
my $queryPart = _handle_exploding_index($index, $term);
|
||||
if ($queryPart) {
|
||||
$query .= "($queryPart)";
|
||||
}
|
||||
$operators = ();
|
||||
$operands[0] = "ccl=$query";
|
||||
}
|
||||
|
||||
return ( $operators, \@operands, $indexes, $limits, $sort_by, $scan, $lang);
|
||||
}
|
||||
|
||||
=head2 buildQuery
|
||||
|
||||
( $error, $query,
|
||||
|
@ -1050,6 +1150,8 @@ sub buildQuery {
|
|||
|
||||
warn "---------\nEnter buildQuery\n---------" if $DEBUG;
|
||||
|
||||
( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = parseQuery($operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
|
||||
|
||||
# dereference
|
||||
my @operators = $operators ? @$operators : ();
|
||||
my @indexes = $indexes ? @$indexes : ();
|
||||
|
|
|
@ -13,6 +13,11 @@
|
|||
<option value="se"> Series title</option>
|
||||
<option value="su">Subject</option>
|
||||
<option value="su,phr"> Subject as phrase</option>
|
||||
[% IF ( expanded_options ) %]
|
||||
<option value="su-br"> Subject and broader terms</option>
|
||||
<option value="su-na"> Subject and narrower terms</option>
|
||||
<option value="su-rl"> Subject and related terms</option>
|
||||
[% END %]
|
||||
<option value="bc">Barcode</option>
|
||||
<option value="location">Shelving location</option>
|
||||
<option value="sn">Standard number</option>
|
||||
|
|
|
@ -67,8 +67,11 @@
|
|||
<select name="idx">
|
||||
<option value="kw">Keyword</option>
|
||||
<option value="su,wrdl">Subject</option>
|
||||
[% IF ( search_boxes_loo.expanded_options ) %]
|
||||
[% IF ( expanded_options ) %]
|
||||
<option value="su,phr"> Subject phrase</option>
|
||||
<option value="su-br"> Subject and broader terms</option>
|
||||
<option value="su-na"> Subject and narrower terms</option>
|
||||
<option value="su-rl"> Subject and related terms</option>
|
||||
[% END %]
|
||||
<option value="ti">Title</option>
|
||||
[% IF ( expanded_options ) %]
|
||||
|
|
|
@ -12,7 +12,7 @@ use YAML;
|
|||
use C4::Debug;
|
||||
require C4::Context;
|
||||
|
||||
use Test::More tests => 57;
|
||||
use Test::More tests => 78;
|
||||
use Test::MockModule;
|
||||
use MARC::Record;
|
||||
use File::Spec;
|
||||
|
@ -515,4 +515,82 @@ END {
|
|||
}
|
||||
}
|
||||
|
||||
# Testing exploding indexes
|
||||
my $term;
|
||||
my $searchmodule = new Test::MockModule('C4::Search');
|
||||
$searchmodule->mock('SimpleSearch', sub {
|
||||
my $query = shift;
|
||||
|
||||
is($query, "Heading,wrdl=$term", "Searching for expected term '$term' for exploding") or return '', [], 0;
|
||||
|
||||
my $record = MARC::Record->new;
|
||||
if ($query =~ m/Arizona/) {
|
||||
$record->add_fields(
|
||||
[ '001', '1234' ],
|
||||
[ '151', ' ', ' ', a => 'Arizona' ],
|
||||
[ '551', ' ', ' ', a => 'United States', w => 'g' ],
|
||||
[ '551', ' ', ' ', a => 'Maricopa County', w => 'h' ],
|
||||
[ '551', ' ', ' ', a => 'Navajo County', w => 'h' ],
|
||||
[ '551', ' ', ' ', a => 'Pima County', w => 'h' ],
|
||||
[ '551', ' ', ' ', a => 'New Mexico' ],
|
||||
);
|
||||
}
|
||||
return '', [ $record->as_usmarc() ], 1;
|
||||
});
|
||||
|
||||
$term = 'Arizona';
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-br' ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Advanced search for broader subjects", $query, 'Arizona', 'United States');
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-na' ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Advanced search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-rl' ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Advanced search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ "$term", 'history' ], [ 'su-rl', 'kw' ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Advanced search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
like($query, qr/history/, "Advanced search for related subjects and keyword 'history' searches for 'history'");
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ 'history', "$term" ], [ 'kw', 'su-rl' ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Order of terms doesn't matter for advanced search", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
like($query, qr/history/, "Order of terms doesn't matter for advanced search");
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ "su-br:$term" ], [ ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Simple search for broader subjects", $query, 'Arizona', 'United States');
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ "su-na:$term" ], [ ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Simple search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ "su-rl:$term" ], [ ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Simple search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
|
||||
( $error, $query, $simple_query, $query_cgi,
|
||||
$query_desc, $limit, $limit_cgi, $limit_desc,
|
||||
$stopwords_removed, $query_type ) = buildQuery([], [ "history and su-rl:$term" ], [ ], [ ], [], 0, 'en');
|
||||
matchesExplodedTerms("Simple search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
|
||||
like($query, qr/history/, "Simple search for related subjects and keyword 'history' searches for 'history'");
|
||||
|
||||
sub matchesExplodedTerms {
|
||||
my ($message, $query, @terms) = @_;
|
||||
my $match = "(( or )?\\((" . join ('|', map { "su=\"$_\"" } @terms) . ")\\)){" . scalar(@terms) . "}";
|
||||
like($query, qr/$match/, $message);
|
||||
}
|
||||
|
||||
1;
|
||||
|
|
Loading…
Reference in a new issue