IMPROVEMENT : reintroducing stopwords management
Zebra (and NoZebra) doesn't deal with stopwords. It search the exact string the user types. This commit : - reintroduces stopword management script that was in 2.2 - add stopword management to Zebra & NoZebra searches. PS : fortunatly, the stopwords table was not removed by updatedatabase, so nothing to do here. (& the table is in kohastructure.sql) Signed-off-by: Chris Cormack <crc@liblime.com>
This commit is contained in:
parent
72e5d4f62d
commit
a3a4bfc5e6
4 changed files with 335 additions and 0 deletions
|
@ -532,6 +532,7 @@ sub buildQuery {
|
|||
}
|
||||
);
|
||||
|
||||
|
||||
# STEP I: determine if this is a form-based / simple query or if it's complex (if complex,
|
||||
# we can't handle field weighting, stemming until a formal query parser is written
|
||||
# I'll work on this soon -- JF
|
||||
|
@ -562,6 +563,10 @@ sub buildQuery {
|
|||
# if there was, we can apply the current operator
|
||||
for ( my $i = 0 ; $i <= @operands ; $i++ ) {
|
||||
my $operand = $operands[$i];
|
||||
# remove stopwords from operand : parse all stopwords & remove them (case insensitive)
|
||||
foreach (keys %{C4::Context->stopwords}) {
|
||||
$operand=~ s/\b$_\b//i;
|
||||
}
|
||||
my $index = $indexes[$i];
|
||||
my $stemmed_operand;
|
||||
my $stemming = C4::Context->parameters("Stemming") || 0;
|
||||
|
@ -1268,6 +1273,7 @@ sub NZanalyse {
|
|||
my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?");
|
||||
# split each word, query the DB and build the biblionumbers result
|
||||
foreach (split / /,$string) {
|
||||
next if C4::Context->stopwords->{uc($_)}; # skip if stopword
|
||||
#warn "search on all indexes on $_";
|
||||
my $biblionumbers;
|
||||
next unless $_;
|
||||
|
|
173
admin/stopwords.pl
Executable file
173
admin/stopwords.pl
Executable file
|
@ -0,0 +1,173 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
#script to administer the stopwords table
|
||||
#written 20/02/2002 by paul.poulain@free.fr
|
||||
# This software is placed under the gnu General Public License, v2 (http://www.gnu.org/licenses/gpl.html)
|
||||
|
||||
# ALGO :
|
||||
# this script use an $op to know what to do.
|
||||
# if $op is empty or none of the above values,
|
||||
# - the default screen is build (with all records, or filtered datas).
|
||||
# - the user can clic on add, modify or delete record.
|
||||
# if $op=add_form
|
||||
# - if primkey exists, this is a modification,so we read the $primkey record
|
||||
# - builds the add/modify form
|
||||
# if $op=add_validate
|
||||
# - the user has just send datas, so we create/modify the record
|
||||
# if $op=delete_form
|
||||
# - we show the record having primkey=$primkey and ask for deletion validation form
|
||||
# if $op=delete_confirm
|
||||
# - we delete the record having primkey=$primkey
|
||||
|
||||
|
||||
# Copyright 2000-2002 Katipo Communications
|
||||
#
|
||||
# This file is part of Koha.
|
||||
#
|
||||
# Koha is free software; you can redistribute it and/or modify it under the
|
||||
# terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation; either version 2 of the License, or (at your option) any later
|
||||
# version.
|
||||
#
|
||||
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
|
||||
# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
|
||||
# Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
use strict;
|
||||
use CGI;
|
||||
use C4::Context;
|
||||
use C4::Output;
|
||||
use C4::Search;
|
||||
use C4::Auth;
|
||||
|
||||
sub StringSearch {
|
||||
my ($env,$searchstring,$type)=@_;
|
||||
my $dbh = C4::Context->dbh;
|
||||
$searchstring=~ s/\'/\\\'/g;
|
||||
my @data=split(' ',$searchstring);
|
||||
my $count=@data;
|
||||
my $query="";
|
||||
my $sth=$dbh->prepare("Select word from stopwords where (word like ?) order by word");
|
||||
$sth->execute("$data[0]%");
|
||||
my @results;
|
||||
my $cnt=0;
|
||||
while (my $data=$sth->fetchrow_hashref){
|
||||
push(@results,$data);
|
||||
$cnt ++;
|
||||
}
|
||||
# $sth->execute;
|
||||
$sth->finish;
|
||||
return ($cnt,\@results);
|
||||
}
|
||||
|
||||
my $input = new CGI;
|
||||
my $searchfield=$input->param('searchfield');
|
||||
my $offset=$input->param('offset');
|
||||
my $script_name="/cgi-bin/koha/admin/stopwords.pl";
|
||||
|
||||
my $pagesize=20;
|
||||
my $op = $input->param('op');
|
||||
$searchfield=~ s/\,//g;
|
||||
|
||||
my ($template, $loggedinuser, $cookie)
|
||||
= get_template_and_user({template_name => "admin/stopwords.tmpl",
|
||||
query => $input,
|
||||
type => "intranet",
|
||||
flagsrequired => {parameters => 1, management => 1},
|
||||
authnotrequired => 0,
|
||||
debug => 1,
|
||||
});
|
||||
|
||||
$template->param(script_name => $script_name,
|
||||
searchfield => $searchfield);
|
||||
|
||||
|
||||
################## ADD_FORM ##################################
|
||||
# called by default. Used to create form to add or modify a record
|
||||
if ($op eq 'add_form') {
|
||||
$template->param(add_form => 1);
|
||||
#---- if primkey exists, it's a modify action, so read values to modify...
|
||||
my $data;
|
||||
if ($searchfield) {
|
||||
my $dbh = C4::Context->dbh;
|
||||
my $sth=$dbh->prepare("select word from stopwords where word=?");
|
||||
$sth->execute($searchfield);
|
||||
$data=$sth->fetchrow_hashref;
|
||||
$sth->finish;
|
||||
}
|
||||
|
||||
# END $OP eq ADD_FORM
|
||||
################## ADD_VALIDATE ##################################
|
||||
# called by add_form, used to insert/modify data in DB
|
||||
} elsif ($op eq 'add_validate') {
|
||||
$template->param(add_validate => 1);
|
||||
my $dbh = C4::Context->dbh;
|
||||
my @tab = split / |,/, $input->param('word');
|
||||
my $sth=$dbh->prepare("replace stopwords (word) values (?)");
|
||||
foreach my $insert_value (@tab) {
|
||||
$sth->execute($insert_value);
|
||||
}
|
||||
$sth->finish;
|
||||
# END $OP eq ADD_VALIDATE
|
||||
################## DELETE_CONFIRM ##################################
|
||||
# called by default form, used to confirm deletion of data in DB
|
||||
} elsif ($op eq 'delete_confirm') {
|
||||
$template->param(delete_confirm => 1);
|
||||
my $dbh = C4::Context->dbh;
|
||||
my $sth=$dbh->prepare("select word from stopwords where word=?");
|
||||
$sth->execute($searchfield);
|
||||
my $data=$sth->fetchrow_hashref;
|
||||
$sth->finish;
|
||||
# END $OP eq DELETE_CONFIRM
|
||||
################## DELETE_CONFIRMED ##################################
|
||||
# called by delete_confirm, used to effectively confirm deletion of data in DB
|
||||
} elsif ($op eq 'delete_confirmed') {
|
||||
$template->param(delete_confirmed => 1);
|
||||
my $dbh = C4::Context->dbh;
|
||||
my $sth=$dbh->prepare("delete from stopwords where word=?");
|
||||
$sth->execute($searchfield);
|
||||
$sth->finish;
|
||||
# END $OP eq DELETE_CONFIRMED
|
||||
################## DEFAULT ##################################
|
||||
} else { # DEFAULT
|
||||
$template->param(else => 1);
|
||||
|
||||
my $env;
|
||||
my ($count,$results)=StringSearch($env,$searchfield,'web');
|
||||
my @loop;
|
||||
my $toggle = 'white';
|
||||
for (my $i=$offset; $i < ($offset+$pagesize<$count?$offset+$pagesize:$count); $i++){
|
||||
my %row = (word => $results->[$i]{'word'},
|
||||
toggle => $toggle);
|
||||
push @loop, \%row;
|
||||
|
||||
if ( $toggle eq 'white' )
|
||||
{
|
||||
$toggle = '#ffffcc';
|
||||
}
|
||||
else
|
||||
{
|
||||
$toggle = 'white';
|
||||
}
|
||||
}
|
||||
$template->param(loop => \@loop);
|
||||
|
||||
if ($offset>0) {
|
||||
$template->param(offsetgtzero => 1,
|
||||
prevpage => $offset-$pagesize);
|
||||
}
|
||||
if ($offset+$pagesize<$count) {
|
||||
$template->param(ltcount => 1,
|
||||
nextpage => $offset+$pagesize);
|
||||
}
|
||||
}
|
||||
$template->param(intranetcolorstylesheet => C4::Context->preference("intranetcolorstylesheet"),
|
||||
intranetstylesheet => C4::Context->preference("intranetstylesheet"),
|
||||
IntranetNav => C4::Context->preference("IntranetNav"),
|
||||
);
|
||||
output_html_with_http_headers $input, $cookie, $template->output;
|
||||
|
|
@ -93,6 +93,10 @@
|
|||
<tr>
|
||||
<th colspan="2">Additional parameters</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="/cgi-bin/koha/admin/stopwords.pl">Stopwords</a></td>
|
||||
<td>Words ignored during search</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="/cgi-bin/koha/admin/printers.pl">Printers</a></td>
|
||||
<td>Printers (UNIX paths).</td>
|
||||
|
|
152
koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tmpl
Normal file
152
koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tmpl
Normal file
|
@ -0,0 +1,152 @@
|
|||
<!-- TMPL_INCLUDE NAME="doc-head-open.inc" -->
|
||||
<title>Koha › System Administration</title>
|
||||
<!-- TMPL_INCLUDE NAME="doc-head-close.inc" -->
|
||||
|
||||
<!-- TMPL_INCLUDE NAME="menus.inc" -->
|
||||
<!-- TMPL_INCLUDE NAME="menu-admin.inc" -->
|
||||
|
||||
<h1>Koha › Stopword</h1>
|
||||
|
||||
<div id="mainbloc">
|
||||
<!-- TMPL_IF NAME="add_form" -->
|
||||
<script language="javascript" type="text/javascript">
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
function isNotNull(f,noalert) {
|
||||
if (f.value.length ==0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
function toUC(f) {
|
||||
var x=f.value.toUpperCase();
|
||||
f.value=x;
|
||||
return true;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
function isNum(v,maybenull) {
|
||||
var n = new Number(v.value);
|
||||
if (isNaN(n)) {
|
||||
return false;
|
||||
}
|
||||
if (maybenull==0 && v.value=='') {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
function isDate(f) {
|
||||
var t = Date.parse(f.value);
|
||||
if (isNaN(t)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
function Check(f) {
|
||||
var ok=1;
|
||||
var _alertString="";
|
||||
var alertString2;
|
||||
if (f.word.value.length==0) {
|
||||
_alertString += "\n- " + _("word missing");
|
||||
}
|
||||
if (_alertString.length==0) {
|
||||
document.Aform.submit();
|
||||
} else {
|
||||
alertString2 = _("Form not submitted because of the following problem(s)");
|
||||
alertString2 += "\n------------------------------------------------------------------------------------\n";
|
||||
alertString2 += _alertString;
|
||||
alert(alertString2);
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- TMPL_IF NAME="searchfield" -->
|
||||
<h1>Modify word</h1>
|
||||
<!-- TMPL_ELSE -->
|
||||
<h1>Add word</h1>
|
||||
<!-- /TMPL_IF -->
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" name="Aform" method="post">
|
||||
<input type="hidden" name="op" value="add_validate">
|
||||
<!-- TMPL_IF NAME="searchfield" -->
|
||||
<p>
|
||||
<label>Word</label>
|
||||
<label>
|
||||
<input type="hidden" name="word" value="<!-- TMPL_VAR NAME="searchfield" -->"><!-- TMPL_VAR NAME="searchfield" -->
|
||||
</label>
|
||||
</p>
|
||||
<!-- TMPL_ELSE -->
|
||||
<p>
|
||||
<label>Word</label>
|
||||
<input type="text" name="word" size="80" maxlength="250" onblur="toUC(this)">
|
||||
</p>
|
||||
<!-- /TMPL_IF -->
|
||||
<p>
|
||||
<td><input type="button" value="OK" onclick="Check(this.form)" /></td>
|
||||
</p>
|
||||
</form>
|
||||
<!-- /TMPL_IF -->
|
||||
|
||||
<!-- TMPL_IF NAME="add_validate" -->
|
||||
Data recorded
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
|
||||
<input type="submit" value="OK" />
|
||||
</form>
|
||||
<!-- /TMPL_IF -->
|
||||
|
||||
<!-- TMPL_IF NAME="delete_confirm" -->
|
||||
<p>CONFIRM DELETION of <!-- TMPL_VAR NAME="searchfield" --></p>
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
|
||||
<input type="hidden" name="op" value="delete_confirmed">
|
||||
<input type="hidden" name="searchfield" value="<!-- TMPL_VAR NAME="searchfield" -->">
|
||||
<input type="submit" value="YES">
|
||||
</form>
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
|
||||
<input type="submit" value="NO" />
|
||||
</form>
|
||||
<!-- /TMPL_IF -->
|
||||
|
||||
<!-- TMPL_IF NAME="delete_confirmed" -->
|
||||
Data deleted
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
|
||||
<input type="submit" value="OK" />
|
||||
</form>
|
||||
<!-- /TMPL_IF -->
|
||||
|
||||
<!-- TMPL_IF NAME="else" -->
|
||||
<h1>Stop words admin</h1>
|
||||
<b>NOTE : if you change something in this table, ask your administrator to run misc/rebuildnonmarc.pl script.</b>
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
|
||||
<input type="text" name="searchfield" value="<!-- TMPL_VAR NAME="searchfield" -->" />
|
||||
<input type="submit" name="ok" value="OK" />
|
||||
</form>
|
||||
|
||||
<!-- TMPL_IF NAME="searchfield" -->
|
||||
You searched for <b><!-- TMPL_VAR NAME="searchfield" --></b>
|
||||
<!-- /TMPL_IF -->
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Word</th>
|
||||
<th> </th>
|
||||
</tr>
|
||||
<!-- TMPL_LOOP NAME="loop" -->
|
||||
<tr bgcolor="<!-- TMPL_VAR NAME="toggle" -->">
|
||||
<td><!-- TMPL_VAR NAME="word" --></td>
|
||||
<td><a href="<!-- TMPL_VAR NAME="script_name" -->?op=delete_confirm&searchfield=<!-- TMPL_VAR NAME="word" -->">Delete</a></td>
|
||||
</tr>
|
||||
<!-- /TMPL_LOOP -->
|
||||
</table>
|
||||
<form action="<!-- TMPL_VAR NAME="script_name" -->" method="post">
|
||||
<input type="hidden" name="op" value="add_form">
|
||||
<!-- TMPL_IF NAME="offsetgtzero" -->
|
||||
<a href="<!-- TMPL_VAR NAME="script_name" -->?offset=<!-- TMPL_VAR NAME="prevpage" -->"><< Prev</a>
|
||||
<!-- /TMPL_IF -->
|
||||
<!-- TMPL_IF NAME="ltcount" -->
|
||||
<a href="<!-- TMPL_VAR NAME="script_name" -->?offset=<!-- TMPL_VAR NAME="nextpage" -->">Next >></a>
|
||||
<!-- /TMPL_iF -->
|
||||
<input type="submit" value="Add Stop word" alt="Add Stop word" />
|
||||
</form>
|
||||
<!-- /TMPL_IF -->
|
||||
</div>
|
||||
|
||||
<!-- TMPL_INCLUDE NAME="parameters-bottom.inc" -->
|
Loading…
Reference in a new issue