From 4feb6a4736adbdc809cb5945269c52210e0d393d Mon Sep 17 00:00:00 2001 From: Jared Camins-Esakov Date: Thu, 23 Aug 2012 19:35:22 -0400 Subject: [PATCH] Bug 8211: Add exploded search options When working with hierarchical subject headings, it is sometimes helpful to do a search for all records with a specific subject, plus broader/narrower/related subjects. This patch adds support for these "exploded" subject searches to Koha. To test: 1) Make sure you have a bunch of hierarchical subjects. I created geographical subjects for "Arizona," "United States," and "Phoenix," and linked them together using 551s, and made sure I had a half dozen records linking to each (but not all to all three). 2) Do a search for su-br:Arizona (or choose "Subject and broader terms" on the advanced search screen with "more options" displayed), and check that you get the records with the subject "Arizona" and the records with the subject "United States" 3) Do a search for su-na:Arizona (or choose "Subject and narrower terms" on the advanced search screen with "more options" displayed), and check that you get the records with the subject "Arizona" and the records with the subject "Phoenix" 4) Do a search for su-rl:Arizona (or choose "Subject and related terms" on the advanced search screen with "more options" displayed), and check that you get the records with the subject "Arizona," the records with the subject "United States," and the records with the subject "Phoenix" 5) Ensure that other searches still work (keyword, subject, ccl, whatever) 6) Sign off Technical details: This patch adds a shim in front of C4::Search::buildQuery in order to preprocess the query and call the _handle_exploding_search callback. This shim will allow us to gradually offload query parsing to a new query parser module. Signed-off-by: wajasu Signed-off-by: Paul Poulain --- C4/Search.pm | 102 ++++++++++++++++++ .../prog/en/includes/search_indexes.inc | 5 + .../prog/en/modules/opac-advsearch.tt | 5 +- t/db_dependent/Search.t | 80 +++++++++++++- 4 files changed, 190 insertions(+), 2 deletions(-) diff --git a/C4/Search.pm b/C4/Search.pm index b36c5f85c2..2b78f7594d 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -34,6 +34,8 @@ use C4::Charset; use YAML; use URI::Escape; use Business::ISBN; +use MARC::Record; +use MARC::Field; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG); @@ -1029,6 +1031,104 @@ sub getIndexes{ return \@indexes; } +=head2 _handle_exploding_index + + my $query = _handle_exploding_index($index, $term) + +Callback routine to generate the search for "exploding" indexes (i.e. +those indexes which are turned into multiple or-connected searches based +on authority data). + +=cut + +sub _handle_exploding_index { + my ( $index, $term ) = @_; + + return unless ($index =~ m/(su-br|su-na|su-rl)/ && $term); + + my $marcflavour = C4::Context->preference('marcflavour'); + + my $codesubfield = $marcflavour eq 'UNIMARC' ? '5' : 'w'; + my $wantedcodes = ''; + my @subqueries = ( "(su=\"$term\")"); + my ($error, $results, $total_hits) = SimpleSearch( "Heading,wrdl=$term", undef, undef, [ "authorityserver" ] ); + foreach my $auth (@$results) { + my $record = MARC::Record->new_from_usmarc($auth); + my @references = $record->field('5..'); + if (@references) { + if ($index eq 'su-br') { + $wantedcodes = 'g'; + } elsif ($index eq 'su-na') { + $wantedcodes = 'h'; + } elsif ($index eq 'su-rl') { + $wantedcodes = ''; + } + foreach my $reference (@references) { + my $codes = $reference->subfield($codesubfield); + push @subqueries, '(su="' . $reference->as_string('abcdefghijlmnopqrstuvxyz') . '")' if (($codes && $codes eq $wantedcodes) || !$wantedcodes); + } + } + } + return join(' or ', @subqueries); +} + +=head2 parseQuery + + ( $operators, $operands, $indexes, $limits, + $sort_by, $scan, $lang ) = + buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang); + +Shim function to ease the transition from buildQuery to a new QueryParser. +This function is called at the beginning of buildQuery, and modifies +buildQuery's input. If it can handle the input, it returns a query that +buildQuery will not try to parse. +=cut + +sub parseQuery { + my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = @_; + + my @operators = $operators ? @$operators : (); + my @indexes = $indexes ? @$indexes : (); + my @operands = $operands ? @$operands : (); + my @limits = $limits ? @$limits : (); + my @sort_by = $sort_by ? @$sort_by : (); + + my $query = $operands[0]; + my $index; + my $term; + +# TODO: once we are using QueryParser, all this special case code for +# exploded search indexes will be replaced by a callback to +# _handle_exploding_index + if ( $query =~ m/^(.*)\b(su-br|su-na|su-rl)[:=](\w.*)$/ ) { + $query = $1; + $index = $2; + $term = $3; + } else { + $query = ''; + for ( my $i = 0 ; $i <= @operands ; $i++ ) { + if ($operands[$i] && $indexes[$i] =~ m/(su-br|su-na|su-rl)/) { + $index = $indexes[$i]; + $term = $operands[$i]; + } elsif ($operands[$i]) { + $query .= $operators[$i] eq 'or' ? ' or ' : ' and ' if ($query); + $query .= "($indexes[$i]:$operands[$i])"; + } + } + } + + if ($index) { + my $queryPart = _handle_exploding_index($index, $term); + if ($queryPart) { + $query .= "($queryPart)"; + } + $operators = (); + $operands[0] = "ccl=$query"; + } + + return ( $operators, \@operands, $indexes, $limits, $sort_by, $scan, $lang); +} + =head2 buildQuery ( $error, $query, @@ -1050,6 +1150,8 @@ sub buildQuery { warn "---------\nEnter buildQuery\n---------" if $DEBUG; + ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang) = parseQuery($operators, $operands, $indexes, $limits, $sort_by, $scan, $lang); + # dereference my @operators = $operators ? @$operators : (); my @indexes = $indexes ? @$indexes : (); diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc index 241321619d..09e748b556 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc +++ b/koha-tmpl/intranet-tmpl/prog/en/includes/search_indexes.inc @@ -13,6 +13,11 @@ + [% IF ( expanded_options ) %] + + + + [% END %] diff --git a/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt b/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt index 6ed2b86710..cca5d0de01 100644 --- a/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt +++ b/koha-tmpl/opac-tmpl/prog/en/modules/opac-advsearch.tt @@ -67,8 +67,11 @@