IMPROVEMENT : reintroducing stopwords management

Zebra (and NoZebra) doesn't deal with stopwords.
It search the exact string the user types.
This commit :
- reintroduces stopword management script that was in 2.2
- add stopword management to Zebra & NoZebra searches.

PS : fortunatly, the stopwords table was not removed by updatedatabase, so nothing to do here.
(& the table is in kohastructure.sql)

Signed-off-by: Chris Cormack <crc@liblime.com>
This commit is contained in:
Paul POULAIN 2007-09-24 16:26:35 +02:00 committed by Chris Cormack
parent 72e5d4f62d
commit a3a4bfc5e6
4 changed files with 335 additions and 0 deletions

View file

@ -532,6 +532,7 @@ sub buildQuery {
}
);
# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
# we can't handle field weighting, stemming until a formal query parser is written
# I'll work on this soon -- JF
@ -562,6 +563,10 @@ sub buildQuery {
# if there was, we can apply the current operator
for ( my $i = 0 ; $i <= @operands ; $i++ ) {
my $operand = $operands[$i];
# remove stopwords from operand : parse all stopwords & remove them (case insensitive)
foreach (keys %{C4::Context->stopwords}) {
$operand=~ s/\b$_\b//i;
}
my $index = $indexes[$i];
my $stemmed_operand;
my $stemming = C4::Context->parameters("Stemming") || 0;
@ -1268,6 +1273,7 @@ sub NZanalyse {
my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?");
# split each word, query the DB and build the biblionumbers result
foreach (split / /,$string) {
next if C4::Context->stopwords->{uc($_)}; # skip if stopword
#warn "search on all indexes on $_";
my $biblionumbers;
next unless $_;

173
admin/stopwords.pl Executable file
View file

@ -0,0 +1,173 @@
#!/usr/bin/perl
#script to administer the stopwords table
#written 20/02/2002 by paul.poulain@free.fr
# This software is placed under the gnu General Public License, v2 (http://www.gnu.org/licenses/gpl.html)
# ALGO :
# this script use an $op to know what to do.
# if $op is empty or none of the above values,
# - the default screen is build (with all records, or filtered datas).
# - the user can clic on add, modify or delete record.
# if $op=add_form
# - if primkey exists, this is a modification,so we read the $primkey record
# - builds the add/modify form
# if $op=add_validate
# - the user has just send datas, so we create/modify the record
# if $op=delete_form
# - we show the record having primkey=$primkey and ask for deletion validation form
# if $op=delete_confirm
# - we delete the record having primkey=$primkey
# Copyright 2000-2002 Katipo Communications
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
# Suite 330, Boston, MA 02111-1307 USA
use strict;
use CGI;
use C4::Context;
use C4::Output;
use C4::Search;
use C4::Auth;
sub StringSearch {
my ($env,$searchstring,$type)=@_;
my $dbh = C4::Context->dbh;
$searchstring=~ s/\'/\\\'/g;
my @data=split(' ',$searchstring);
my $count=@data;
my $query="";
my $sth=$dbh->prepare("Select word from stopwords where (word like ?) order by word");
$sth->execute("$data[0]%");
my @results;
my $cnt=0;
while (my $data=$sth->fetchrow_hashref){
push(@results,$data);
$cnt ++;
}
# $sth->execute;
$sth->finish;
return ($cnt,\@results);
}
my $input = new CGI;
my $searchfield=$input->param('searchfield');
my $offset=$input->param('offset');
my $script_name="/cgi-bin/koha/admin/stopwords.pl";
my $pagesize=20;
my $op = $input->param('op');
$searchfield=~ s/\,//g;
my ($template, $loggedinuser, $cookie)
= get_template_and_user({template_name => "admin/stopwords.tmpl",
query => $input,
type => "intranet",
flagsrequired => {parameters => 1, management => 1},
authnotrequired => 0,
debug => 1,
});
$template->param(script_name => $script_name,
searchfield => $searchfield);
################## ADD_FORM ##################################
# called by default. Used to create form to add or modify a record
if ($op eq 'add_form') {
$template->param(add_form => 1);
#---- if primkey exists, it's a modify action, so read values to modify...
my $data;
if ($searchfield) {
my $dbh = C4::Context->dbh;
my $sth=$dbh->prepare("select word from stopwords where word=?");
$sth->execute($searchfield);
$data=$sth->fetchrow_hashref;
$sth->finish;
}
# END $OP eq ADD_FORM
################## ADD_VALIDATE ##################################
# called by add_form, used to insert/modify data in DB
} elsif ($op eq 'add_validate') {
$template->param(add_validate => 1);
my $dbh = C4::Context->dbh;
my @tab = split / |,/, $input->param('word');
my $sth=$dbh->prepare("replace stopwords (word) values (?)");
foreach my $insert_value (@tab) {
$sth->execute($insert_value);
}
$sth->finish;
# END $OP eq ADD_VALIDATE
################## DELETE_CONFIRM ##################################
# called by default form, used to confirm deletion of data in DB
} elsif ($op eq 'delete_confirm') {
$template->param(delete_confirm => 1);
my $dbh = C4::Context->dbh;
my $sth=$dbh->prepare("select word from stopwords where word=?");
$sth->execute($searchfield);
my $data=$sth->fetchrow_hashref;
$sth->finish;
# END $OP eq DELETE_CONFIRM
################## DELETE_CONFIRMED ##################################
# called by delete_confirm, used to effectively confirm deletion of data in DB
} elsif ($op eq 'delete_confirmed') {
$template->param(delete_confirmed => 1);
my $dbh = C4::Context->dbh;
my $sth=$dbh->prepare("delete from stopwords where word=?");
$sth->execute($searchfield);
$sth->finish;
# END $OP eq DELETE_CONFIRMED
################## DEFAULT ##################################
} else { # DEFAULT
$template->param(else => 1);
my $env;
my ($count,$results)=StringSearch($env,$searchfield,'web');
my @loop;
my $toggle = 'white';
for (my $i=$offset; $i < ($offset+$pagesize<$count?$offset+$pagesize:$count); $i++){
my %row = (word => $results->[$i]{'word'},
toggle => $toggle);
push @loop, \%row;
if ( $toggle eq 'white' )
{
$toggle = '#ffffcc';
}
else
{
$toggle = 'white';
}
}
$template->param(loop => \@loop);
if ($offset>0) {
$template->param(offsetgtzero => 1,
prevpage => $offset-$pagesize);
}
if ($offset+$pagesize<$count) {
$template->param(ltcount => 1,
nextpage => $offset+$pagesize);
}
}
$template->param(intranetcolorstylesheet => C4::Context->preference("intranetcolorstylesheet"),
intranetstylesheet => C4::Context->preference("intranetstylesheet"),
IntranetNav => C4::Context->preference("IntranetNav"),
);
output_html_with_http_headers $input, $cookie, $template->output;

View file

@ -93,6 +93,10 @@
<tr>
<th colspan="2">Additional parameters</th>
</tr>
<tr>
<td><a href="/cgi-bin/koha/admin/stopwords.pl">Stopwords</a></td>
<td>Words ignored during search</td>
</tr>
<tr>
<td><a href="/cgi-bin/koha/admin/printers.pl">Printers</a></td>
<td>Printers (UNIX paths).</td>

View file

@ -0,0 +1,152 @@
<!-- TMPL_INCLUDE NAME="doc-head-open.inc" -->
<title>Koha &rsaquo; System Administration</title>
<!-- TMPL_INCLUDE NAME="doc-head-close.inc" -->
<!-- TMPL_INCLUDE NAME="menus.inc" -->
<!-- TMPL_INCLUDE NAME="menu-admin.inc" -->
<h1>Koha &rsaquo; Stopword</h1>
<div id="mainbloc">
<!-- TMPL_IF NAME="add_form" -->
<script language="javascript" type="text/javascript">
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function isNotNull(f,noalert) {
if (f.value.length ==0) {
return false;
}
return true;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function toUC(f) {
var x=f.value.toUpperCase();
f.value=x;
return true;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function isNum(v,maybenull) {
var n = new Number(v.value);
if (isNaN(n)) {
return false;
}
if (maybenull==0 && v.value=='') {
return false;
}
return true;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function isDate(f) {
var t = Date.parse(f.value);
if (isNaN(t)) {
return false;
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
function Check(f) {
var ok=1;
var _alertString="";
var alertString2;
if (f.word.value.length==0) {
_alertString += "\n- " + _("word missing");
}
if (_alertString.length==0) {
document.Aform.submit();
} else {
alertString2 = _("Form not submitted because of the following problem(s)");
alertString2 += "\n------------------------------------------------------------------------------------\n";
alertString2 += _alertString;
alert(alertString2);
}
}
</script>
<!-- TMPL_IF NAME="searchfield" -->
<h1>Modify word</h1>
<!-- TMPL_ELSE -->
<h1>Add word</h1>
<!-- /TMPL_IF -->
<form action="<!-- TMPL_VAR NAME="script_name" -->" name="Aform" method="post">
<input type="hidden" name="op" value="add_validate">
<!-- TMPL_IF NAME="searchfield" -->
<p>
<label>Word</label>
<label>
<input type="hidden" name="word" value="<!-- TMPL_VAR NAME="searchfield" -->"><!-- TMPL_VAR NAME="searchfield" -->
</label>
</p>
<!-- TMPL_ELSE -->
<p>
<label>Word</label>
<input type="text" name="word" size="80" maxlength="250" onblur="toUC(this)">
</p>
<!-- /TMPL_IF -->
<p>
<td><input type="button" value="OK" onclick="Check(this.form)" /></td>
</p>
</form>
<!-- /TMPL_IF -->
<!-- TMPL_IF NAME="add_validate" -->
Data recorded
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
<input type="submit" value="OK" />
</form>
<!-- /TMPL_IF -->
<!-- TMPL_IF NAME="delete_confirm" -->
<p>CONFIRM DELETION of <!-- TMPL_VAR NAME="searchfield" --></p>
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
<input type="hidden" name="op" value="delete_confirmed">
<input type="hidden" name="searchfield" value="<!-- TMPL_VAR NAME="searchfield" -->">
<input type="submit" value="YES">
</form>
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
<input type="submit" value="NO" />
</form>
<!-- /TMPL_IF -->
<!-- TMPL_IF NAME="delete_confirmed" -->
Data deleted
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
<input type="submit" value="OK" />
</form>
<!-- /TMPL_IF -->
<!-- TMPL_IF NAME="else" -->
<h1>Stop words admin</h1>
<b>NOTE : if you change something in this table, ask your administrator to run misc/rebuildnonmarc.pl script.</b>
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
<input type="text" name="searchfield" value="<!-- TMPL_VAR NAME="searchfield" -->" />
<input type="submit" name="ok" value="OK" />
</form>
<!-- TMPL_IF NAME="searchfield" -->
You searched for <b><!-- TMPL_VAR NAME="searchfield" --></b>
<!-- /TMPL_IF -->
<table>
<tr>
<th>Word</th>
<th> </th>
</tr>
<!-- TMPL_LOOP NAME="loop" -->
<tr bgcolor="<!-- TMPL_VAR NAME="toggle" -->">
<td><!-- TMPL_VAR NAME="word" --></td>
<td><a href="<!-- TMPL_VAR NAME="script_name" -->?op=delete_confirm&amp;searchfield=<!-- TMPL_VAR NAME="word" -->">Delete</a></td>
</tr>
<!-- /TMPL_LOOP -->
</table>
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
<input type="hidden" name="op" value="add_form">
<!-- TMPL_IF NAME="offsetgtzero" -->
<a href="<!-- TMPL_VAR NAME="script_name" -->?offset=<!-- TMPL_VAR NAME="prevpage" -->">&lt;&lt; Prev</a>
<!-- /TMPL_IF -->
<!-- TMPL_IF NAME="ltcount" -->
<a href="<!-- TMPL_VAR NAME="script_name" -->?offset=<!-- TMPL_VAR NAME="nextpage" -->">Next &gt;&gt;</a>
<!-- /TMPL_iF -->
<input type="submit" value="Add Stop word" alt="Add Stop word" />
</form>
<!-- /TMPL_IF -->
</div>
<!-- TMPL_INCLUDE NAME="parameters-bottom.inc" -->