Bug 8211: Add exploded search options

When working with hierarchical subject headings, it is sometimes helpful
to do a search for all records with a specific subject, plus
broader/narrower/related subjects. This patch adds support for these
"exploded" subject searches to Koha.

To test:
1) Make sure you have a bunch of hierarchical subjects. I created
   geographical subjects for "Arizona," "United States," and "Phoenix,"
   and linked them together using 551s, and made sure I had a half
   dozen records linking to each (but not all to all three).
2) Do a search for su-br:Arizona (or choose "Subject and broader terms"
   on the advanced search screen with "more options" displayed), and
   check that you get the records with the subject "Arizona" and the
   records with the subject "United States"
3) Do a search for su-na:Arizona (or choose "Subject and narrower terms"
   on the advanced search screen with "more options" displayed), and
   check that you get the records with the subject "Arizona" and the
   records with the subject "Phoenix"
4) Do a search for su-rl:Arizona (or choose "Subject and related terms"
   on the advanced search screen with "more options" displayed), and
   check that you get the records with the subject "Arizona," the
   records with the subject "United States," and the records with the
   subject "Phoenix"
5) Ensure that other searches still work (keyword, subject, ccl,
   whatever)
6) Sign off

Technical details:
This patch adds a shim in front of C4::Search::buildQuery in order to
preprocess the query and call the _handle_exploding_search callback.
This shim will allow us to gradually offload query parsing to a new
query parser module.

Signed-off-by: wajasu <matted-34813@mypacks.net>
Signed-off-by: Paul Poulain <paul.poulain@biblibre.com>
This commit is contained in:
Jared Camins-Esakov 2012-08-23 19:35:22 -04:00 committed by Paul Poulain
parent 401d9e4479
commit 4feb6a4736
4 changed files with 190 additions and 2 deletions

View file

@ -34,6 +34,8 @@ use C4::Charset;
use YAML;
use URI::Escape;
use Business::ISBN;
use MARC::Record;
use MARC::Field;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
@ -1029,6 +1031,104 @@ sub getIndexes{
return \@indexes;
}
=head2 _handle_exploding_index
my $query = _handle_exploding_index($index, $term)
Callback routine to generate the search for "exploding" indexes (i.e.
those indexes which are turned into multiple or-connected searches based
on authority data).
=cut
sub _handle_exploding_index {
my ( $index, $term ) = @_;
return unless ($index =~ m/(su-br|su-na|su-rl)/ && $term);
my $marcflavour = C4::Context->preference('marcflavour');
my $codesubfield = $marcflavour eq 'UNIMARC' ? '5' : 'w';
my $wantedcodes = '';
my @subqueries = ( "(su=\"$term\")");
my ($error, $results, $total_hits) = SimpleSearch( "Heading,wrdl=$term", undef, undef, [ "authorityserver" ] );
foreach my $auth (@$results) {
my $record = MARC::Record->new_from_usmarc($auth);
my @references = $record->field('5..');
if (@references) {
if ($index eq 'su-br') {
$wantedcodes = 'g';
} elsif ($index eq 'su-na') {
$wantedcodes = 'h';
} elsif ($index eq 'su-rl') {
$wantedcodes = '';
}
foreach my $reference (@references) {
my $codes = $reference->subfield($codesubfield);
push @subqueries, '(su="' . $reference->as_string('abcdefghijlmnopqrstuvxyz') . '")' if (($codes && $codes eq $wantedcodes) || !$wantedcodes);
}
}
}
return join(' or ', @subqueries);
}
=head2 parseQuery
( $operators, $operands, $indexes, $limits,
$sort_by, $scan, $lang ) =
buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
Shim function to ease the transition from buildQuery to a new QueryParser.
This function is called at the beginning of buildQuery, and modifies
buildQuery's input. If it can handle the input, it returns a query that
buildQuery will not try to parse.
=cut
sub parseQuery {
my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_;
my @operators = $operators ? @$operators : ();
my @indexes = $indexes ? @$indexes : ();
my @operands = $operands ? @$operands : ();
my @limits = $limits ? @$limits : ();
my @sort_by = $sort_by ? @$sort_by : ();
my $query = $operands[0];
my $index;
my $term;
# TODO: once we are using QueryParser, all this special case code for
# exploded search indexes will be replaced by a callback to
# _handle_exploding_index
if ( $query =~ m/^(.*)\b(su-br|su-na|su-rl)[:=](\w.*)$/ ) {
$query = $1;
$index = $2;
$term = $3;
} else {
$query = '';
for ( my $i = 0 ; $i <= @operands ; $i++ ) {
if ($operands[$i] && $indexes[$i] =~ m/(su-br|su-na|su-rl)/) {
$index = $indexes[$i];
$term = $operands[$i];
} elsif ($operands[$i]) {
$query .= $operators[$i] eq 'or' ? ' or ' : ' and ' if ($query);
$query .= "($indexes[$i]:$operands[$i])";
}
}
}
if ($index) {
my $queryPart = _handle_exploding_index($index, $term);
if ($queryPart) {
$query .= "($queryPart)";
}
$operators = ();
$operands[0] = "ccl=$query";
}
return ( $operators, \@operands, $indexes, $limits, $sort_by, $scan, $lang);
}
=head2 buildQuery
( $error, $query,
@ -1050,6 +1150,8 @@ sub buildQuery {
warn "---------\nEnter buildQuery\n---------" if $DEBUG;
( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = parseQuery($operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
# dereference
my @operators = $operators ? @$operators : ();
my @indexes = $indexes ? @$indexes : ();

View file

@ -13,6 +13,11 @@
<option value="se">&nbsp;&nbsp;&nbsp;&nbsp; Series title</option>
<option value="su">Subject</option>
<option value="su,phr">&nbsp;&nbsp;&nbsp;&nbsp; Subject as phrase</option>
[% IF ( expanded_options ) %]
<option value="su-br">&nbsp;&nbsp;&nbsp;&nbsp; Subject and broader terms</option>
<option value="su-na">&nbsp;&nbsp;&nbsp;&nbsp; Subject and narrower terms</option>
<option value="su-rl">&nbsp;&nbsp;&nbsp;&nbsp; Subject and related terms</option>
[% END %]
<option value="bc">Barcode</option>
<option value="location">Shelving location</option>
<option value="sn">Standard number</option>

View file

@ -67,8 +67,11 @@
<select name="idx">
<option value="kw">Keyword</option>
<option value="su,wrdl">Subject</option>
[% IF ( search_boxes_loo.expanded_options ) %]
[% IF ( expanded_options ) %]
<option value="su,phr">&nbsp;&nbsp;&nbsp;&nbsp; Subject phrase</option>
<option value="su-br">&nbsp;&nbsp;&nbsp;&nbsp; Subject and broader terms</option>
<option value="su-na">&nbsp;&nbsp;&nbsp;&nbsp; Subject and narrower terms</option>
<option value="su-rl">&nbsp;&nbsp;&nbsp;&nbsp; Subject and related terms</option>
[% END %]
<option value="ti">Title</option>
[% IF ( expanded_options ) %]

View file

@ -12,7 +12,7 @@ use YAML;
use C4::Debug;
require C4::Context;
use Test::More tests => 57;
use Test::More tests => 78;
use Test::MockModule;
use MARC::Record;
use File::Spec;
@ -515,4 +515,82 @@ END {
}
}
# Testing exploding indexes
my $term;
my $searchmodule = new Test::MockModule('C4::Search');
$searchmodule->mock('SimpleSearch', sub {
my $query = shift;
is($query, "Heading,wrdl=$term", "Searching for expected term '$term' for exploding") or return '', [], 0;
my $record = MARC::Record->new;
if ($query =~ m/Arizona/) {
$record->add_fields(
[ '001', '1234' ],
[ '151', ' ', ' ', a => 'Arizona' ],
[ '551', ' ', ' ', a => 'United States', w => 'g' ],
[ '551', ' ', ' ', a => 'Maricopa County', w => 'h' ],
[ '551', ' ', ' ', a => 'Navajo County', w => 'h' ],
[ '551', ' ', ' ', a => 'Pima County', w => 'h' ],
[ '551', ' ', ' ', a => 'New Mexico' ],
);
}
return '', [ $record->as_usmarc() ], 1;
});
$term = 'Arizona';
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-br' ], [ ], [], 0, 'en');
matchesExplodedTerms("Advanced search for broader subjects", $query, 'Arizona', 'United States');
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-na' ], [ ], [], 0, 'en');
matchesExplodedTerms("Advanced search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County');
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-rl' ], [ ], [], 0, 'en');
matchesExplodedTerms("Advanced search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ "$term", 'history' ], [ 'su-rl', 'kw' ], [ ], [], 0, 'en');
matchesExplodedTerms("Advanced search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
like($query, qr/history/, "Advanced search for related subjects and keyword 'history' searches for 'history'");
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ 'history', "$term" ], [ 'kw', 'su-rl' ], [ ], [], 0, 'en');
matchesExplodedTerms("Order of terms doesn't matter for advanced search", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
like($query, qr/history/, "Order of terms doesn't matter for advanced search");
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ "su-br:$term" ], [ ], [ ], [], 0, 'en');
matchesExplodedTerms("Simple search for broader subjects", $query, 'Arizona', 'United States');
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ "su-na:$term" ], [ ], [ ], [], 0, 'en');
matchesExplodedTerms("Simple search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County');
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ "su-rl:$term" ], [ ], [ ], [], 0, 'en');
matchesExplodedTerms("Simple search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ "history and su-rl:$term" ], [ ], [ ], [], 0, 'en');
matchesExplodedTerms("Simple search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County');
like($query, qr/history/, "Simple search for related subjects and keyword 'history' searches for 'history'");
sub matchesExplodedTerms {
my ($message, $query, @terms) = @_;
my $match = "(( or )?\\((" . join ('|', map { "su=\"$_\"" } @terms) . ")\\)){" . scalar(@terms) . "}";
like($query, qr/$match/, $message);
}
1;