From a3a4bfc5e69bcb2757d33813e19176afcaf0019a Mon Sep 17 00:00:00 2001 From: Paul POULAIN Date: Mon, 24 Sep 2007 16:26:35 +0200 Subject: [PATCH] IMPROVEMENT : reintroducing stopwords management Zebra (and NoZebra) doesn't deal with stopwords. It search the exact string the user types. This commit : - reintroduces stopword management script that was in 2.2 - add stopword management to Zebra & NoZebra searches. PS : fortunatly, the stopwords table was not removed by updatedatabase, so nothing to do here. (& the table is in kohastructure.sql) Signed-off-by: Chris Cormack --- C4/Search.pm | 6 + admin/stopwords.pl | 173 ++++++++++++++++++ .../prog/en/modules/admin/admin-home.tmpl | 4 + .../prog/en/modules/admin/stopwords.tmpl | 152 +++++++++++++++ 4 files changed, 335 insertions(+) create mode 100755 admin/stopwords.pl create mode 100644 koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tmpl diff --git a/C4/Search.pm b/C4/Search.pm index e6cecd3d6f..1fe9a64058 100755 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -532,6 +532,7 @@ sub buildQuery { } ); + # STEP I: determine if this is a form-based / simple query or if it's complex (if complex, # we can't handle field weighting, stemming until a formal query parser is written # I'll work on this soon -- JF @@ -562,6 +563,10 @@ sub buildQuery { # if there was, we can apply the current operator for ( my $i = 0 ; $i <= @operands ; $i++ ) { my $operand = $operands[$i]; + # remove stopwords from operand : parse all stopwords & remove them (case insensitive) + foreach (keys %{C4::Context->stopwords}) { + $operand=~ s/\b$_\b//i; + } my $index = $indexes[$i]; my $stemmed_operand; my $stemming = C4::Context->parameters("Stemming") || 0; @@ -1268,6 +1273,7 @@ sub NZanalyse { my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"); # split each word, query the DB and build the biblionumbers result foreach (split / /,$string) { + next if C4::Context->stopwords->{uc($_)}; # skip if stopword #warn "search on all indexes on $_"; my $biblionumbers; next unless $_; diff --git a/admin/stopwords.pl b/admin/stopwords.pl new file mode 100755 index 0000000000..a3efc7d0a3 --- /dev/null +++ b/admin/stopwords.pl @@ -0,0 +1,173 @@ +#!/usr/bin/perl + +#script to administer the stopwords table +#written 20/02/2002 by paul.poulain@free.fr +# This software is placed under the gnu General Public License, v2 (http://www.gnu.org/licenses/gpl.html) + +# ALGO : +# this script use an $op to know what to do. +# if $op is empty or none of the above values, +# - the default screen is build (with all records, or filtered datas). +# - the user can clic on add, modify or delete record. +# if $op=add_form +# - if primkey exists, this is a modification,so we read the $primkey record +# - builds the add/modify form +# if $op=add_validate +# - the user has just send datas, so we create/modify the record +# if $op=delete_form +# - we show the record having primkey=$primkey and ask for deletion validation form +# if $op=delete_confirm +# - we delete the record having primkey=$primkey + + +# Copyright 2000-2002 Katipo Communications +# +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA + +use strict; +use CGI; +use C4::Context; +use C4::Output; +use C4::Search; +use C4::Auth; + +sub StringSearch { + my ($env,$searchstring,$type)=@_; + my $dbh = C4::Context->dbh; + $searchstring=~ s/\'/\\\'/g; + my @data=split(' ',$searchstring); + my $count=@data; + my $query=""; + my $sth=$dbh->prepare("Select word from stopwords where (word like ?) order by word"); + $sth->execute("$data[0]%"); + my @results; + my $cnt=0; + while (my $data=$sth->fetchrow_hashref){ + push(@results,$data); + $cnt ++; + } + # $sth->execute; + $sth->finish; + return ($cnt,\@results); +} + +my $input = new CGI; +my $searchfield=$input->param('searchfield'); +my $offset=$input->param('offset'); +my $script_name="/cgi-bin/koha/admin/stopwords.pl"; + +my $pagesize=20; +my $op = $input->param('op'); +$searchfield=~ s/\,//g; + +my ($template, $loggedinuser, $cookie) + = get_template_and_user({template_name => "admin/stopwords.tmpl", + query => $input, + type => "intranet", + flagsrequired => {parameters => 1, management => 1}, + authnotrequired => 0, + debug => 1, + }); + +$template->param(script_name => $script_name, + searchfield => $searchfield); + + +################## ADD_FORM ################################## +# called by default. Used to create form to add or modify a record +if ($op eq 'add_form') { + $template->param(add_form => 1); + #---- if primkey exists, it's a modify action, so read values to modify... + my $data; + if ($searchfield) { + my $dbh = C4::Context->dbh; + my $sth=$dbh->prepare("select word from stopwords where word=?"); + $sth->execute($searchfield); + $data=$sth->fetchrow_hashref; + $sth->finish; + } + + # END $OP eq ADD_FORM +################## ADD_VALIDATE ################################## +# called by add_form, used to insert/modify data in DB +} elsif ($op eq 'add_validate') { + $template->param(add_validate => 1); + my $dbh = C4::Context->dbh; + my @tab = split / |,/, $input->param('word'); + my $sth=$dbh->prepare("replace stopwords (word) values (?)"); + foreach my $insert_value (@tab) { + $sth->execute($insert_value); + } + $sth->finish; + # END $OP eq ADD_VALIDATE +################## DELETE_CONFIRM ################################## +# called by default form, used to confirm deletion of data in DB +} elsif ($op eq 'delete_confirm') { + $template->param(delete_confirm => 1); + my $dbh = C4::Context->dbh; + my $sth=$dbh->prepare("select word from stopwords where word=?"); + $sth->execute($searchfield); + my $data=$sth->fetchrow_hashref; + $sth->finish; + # END $OP eq DELETE_CONFIRM +################## DELETE_CONFIRMED ################################## +# called by delete_confirm, used to effectively confirm deletion of data in DB +} elsif ($op eq 'delete_confirmed') { + $template->param(delete_confirmed => 1); + my $dbh = C4::Context->dbh; + my $sth=$dbh->prepare("delete from stopwords where word=?"); + $sth->execute($searchfield); + $sth->finish; + # END $OP eq DELETE_CONFIRMED +################## DEFAULT ################################## +} else { # DEFAULT + $template->param(else => 1); + + my $env; + my ($count,$results)=StringSearch($env,$searchfield,'web'); + my @loop; + my $toggle = 'white'; + for (my $i=$offset; $i < ($offset+$pagesize<$count?$offset+$pagesize:$count); $i++){ + my %row = (word => $results->[$i]{'word'}, + toggle => $toggle); + push @loop, \%row; + + if ( $toggle eq 'white' ) + { + $toggle = '#ffffcc'; + } + else + { + $toggle = 'white'; + } + } + $template->param(loop => \@loop); + + if ($offset>0) { + $template->param(offsetgtzero => 1, + prevpage => $offset-$pagesize); + } + if ($offset+$pagesize<$count) { + $template->param(ltcount => 1, + nextpage => $offset+$pagesize); + } +} +$template->param(intranetcolorstylesheet => C4::Context->preference("intranetcolorstylesheet"), + intranetstylesheet => C4::Context->preference("intranetstylesheet"), + IntranetNav => C4::Context->preference("IntranetNav"), + ); +output_html_with_http_headers $input, $cookie, $template->output; + diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl index 1efb17ac64..9aed517b0e 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/admin-home.tmpl @@ -93,6 +93,10 @@ Additional parameters + + Stopwords + Words ignored during search + Printers Printers (UNIX paths). diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tmpl new file mode 100644 index 0000000000..a857efaa06 --- /dev/null +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tmpl @@ -0,0 +1,152 @@ + +Koha › System Administration + + + + + +

Koha › Stopword

+ +
+ + + + +

Modify word

+ +

Add word

+ +
" name="Aform" method="post"> + + +

+ + +

+ +

+ + +

+ +

+ +

+
+ + + + Data recorded +
" method="post"> + +
+ + + +

CONFIRM DELETION of

+
" method="post"> + + "> + +
+
" method="post"> + +
+ + + + Data deleted +
" method="post"> + +
+ + + +

Stop words admin

+ NOTE : if you change something in this table, ask your administrator to run misc/rebuildnonmarc.pl script. +
" method="post"> + " /> + +
+ + + You searched for + + + + + + + + + "> + + + + +
Word
?op=delete_confirm&searchfield=">Delete
+
" method="post"> + + + ?offset="><< Prev + + + ?offset=">Next >> + + +
+ +
+ + -- 2.39.5