Browse Source

Bug 9819 - 'stopwords'-related code removed

This patch removes code related to stopwords usage. The following methods are removed:

C4::Search->remove_stopwords
C4::Context->stopwords
C4::Context->_new_stopwords

And the buildQuery API was changed (removed the \@removed_stopwords return value).
A follow-up is provided for database changes, to make rebasing easier.

To test:
- Apply this patch
- Do some searches in both intranet and opac interfaces
- Nothing should break

Sponsored-by: Universidad Nacional de Córdoba
Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
Signed-off-by: Chris Cormack <chrisc@catalyst.net.nz>

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
new_12478_elasticsearch
Tomás Cohen Arazi 9 years ago
committed by Kyle M Hall
parent
commit
826326064e
  1. 46
      C4/Context.pm
  2. 58
      C4/Search.pm
  3. 1
      INSTALL.fedora7
  4. 96
      admin/stopwords.pl
  5. 5
      catalogue/search.pl
  6. 4
      cataloguing/addbooks.pl
  7. 99
      installer/data/mysql/de-DE/mandatory/stopwords.sql
  8. 1
      installer/data/mysql/de-DE/mandatory/stopwords.txt
  9. 99
      installer/data/mysql/en/mandatory/stopwords.sql
  10. 1
      installer/data/mysql/en/mandatory/stopwords.txt
  11. 71
      installer/data/mysql/fr-FR/1-Obligatoire/stopwords.sql
  12. 1
      installer/data/mysql/fr-FR/1-Obligatoire/stopwords.txt
  13. 194
      installer/data/mysql/it-IT/necessari/stopwords.sql
  14. 1
      installer/data/mysql/it-IT/necessari/stopwords.txt
  15. 26
      installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.sql
  16. 1
      installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.txt
  17. 99
      installer/data/mysql/pl-PL/mandatory/stopwords.sql
  18. 1
      installer/data/mysql/pl-PL/mandatory/stopwords.txt
  19. 21
      installer/data/mysql/ru-RU/mandatory/stopwords.sql
  20. 1
      installer/data/mysql/ru-RU/mandatory/stopwords.txt
  21. 29
      installer/data/mysql/uk-UA/mandatory/stopwords.sql
  22. 3
      koha-tmpl/intranet-tmpl/prog/en/includes/prefs-admin-search.inc
  23. 28
      koha-tmpl/intranet-tmpl/prog/en/includes/stopwords-admin-search.inc
  24. 146
      koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tt
  25. 1
      koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tt
  26. 2
      misc/batchRebuildBiblioTables.pl
  27. 5
      opac/opac-search.pl
  28. 97
      t/db_dependent/Search.t
  29. 4
      test/search.pl

46
C4/Context.pm

@ -128,8 +128,6 @@ C4::Context - Maintain and manipulate the context of a Koha script
$Zconn = C4::Context->Zconn;
$stopwordhash = C4::Context->stopwords;
=head1 DESCRIPTION
When a Koha script runs, it makes use of a certain number of things:
@ -365,7 +363,6 @@ sub new {
return if !defined($self->{"config"});
$self->{"Zconn"} = undef; # Zebra Connections
$self->{"stopwords"} = undef; # stopwords list
$self->{"marcfromkohafield"} = undef; # the hash with relations between koha table fields and MARC field/subfield
$self->{"userenv"} = undef; # User env
$self->{"activeuser"} = undef; # current active user
@ -908,8 +905,6 @@ sub marcfromkohafield
}
# _new_marcfromkohafield
# Internal helper function (not a method!). This creates a new
# hash with stopwords
sub _new_marcfromkohafield
{
my $dbh = C4::Context->dbh;
@ -923,47 +918,6 @@ sub _new_marcfromkohafield
return $marcfromkohafield;
}
=head2 stopwords
$dbh = C4::Context->stopwords;
Returns a hash with stopwords.
This hash is cached for future use: if you call
C<C4::Context-E<gt>stopwords> twice, you will get the same hash without real DB access
=cut
#'
sub stopwords
{
my $retval = {};
# If the hash already exists, return it.
return $context->{"stopwords"} if defined($context->{"stopwords"});
# No hash. Create one.
$context->{"stopwords"} = &_new_stopwords();
return $context->{"stopwords"};
}
# _new_stopwords
# Internal helper function (not a method!). This creates a new
# hash with stopwords
sub _new_stopwords
{
my $dbh = C4::Context->dbh;
my $stopwordlist;
my $sth = $dbh->prepare("select word from stopwords");
$sth->execute;
while (my $stopword = $sth->fetchrow_array) {
$stopwordlist->{$stopword} = uc($stopword);
}
$stopwordlist->{A} = "A" unless $stopwordlist;
return $stopwordlist;
}
=head2 userenv
C4::Context->userenv;

58
C4/Search.pm

@ -911,32 +911,6 @@ sub pazGetRecords {
return ( undef, $results_hashref, \@facets_loop );
}
# STOPWORDS
sub _remove_stopwords {
my ( $operand, $index ) = @_;
my @stopwords_removed;
# phrase and exact-qualified indexes shouldn't have stopwords removed
if ( $index !~ m/,(phr|ext)/ ) {
# remove stopwords from operand : parse all stopwords & remove them (case insensitive)
# we use IsAlpha unicode definition, to deal correctly with diacritics.
# otherwise, a French word like "leçon" would be split into "le" "çon", "le"
# is a stopword, we'd get "çon" and wouldn't find anything...
#
foreach ( keys %{ C4::Context->stopwords } ) {
next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
if ( my ($matched) = ($operand =~
/([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi))
{
$operand =~ s/\Q$matched\E/ /gi;
push @stopwords_removed, $_;
}
}
}
return ( $operand, \@stopwords_removed );
}
# TRUNCATION
sub _detect_truncation {
my ( $operand, $index ) = @_;
@ -1416,10 +1390,10 @@ sub parseQuery {
$simple_query, $query_cgi,
$query_desc, $limit,
$limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
$query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang);
Build queries and limits in CCL, CGI, Human,
handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
handle truncation, stemming, field weighting, fuzziness, etc.
See verbose embedded documentation.
@ -1445,7 +1419,6 @@ sub buildQuery {
my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
my $query = $operands[0];
my $simple_query = $operands[0];
@ -1458,8 +1431,6 @@ sub buildQuery {
my $limit_cgi;
my $limit_desc;
my $stopwords_removed; # flag to determine if stopwords have been removed
my $cclq = 0;
my $cclindexes = getIndexes();
if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) {
@ -1503,7 +1474,7 @@ sub buildQuery {
# return (
# undef, $query, $simple_query, $query_cgi,
# $query, $limit, $limit_cgi, $limit_desc,
# $stopwords_removed, 'ccl'
# 'ccl'
# );
# }
@ -1527,11 +1498,10 @@ sub buildQuery {
# A flag to determine whether or not to add the index to the query
my $indexes_set;
# If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
# If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling
if ( $operands[$i] =~ /\w(:|=)/ || $scan ) {
$weight_fields = 0;
$stemming = 0;
$remove_stopwords = 0;
} else {
$operands[$i] =~ s/\?/{?}/g; # need to escape question marks
}
@ -1550,7 +1520,7 @@ sub buildQuery {
#weight_fields/relevance search causes errors with date ranges
#In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range)
#In the case of YYYY-YYYY, it will return no results
$stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
$stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
}
# Date of Acquisition
@ -1561,15 +1531,14 @@ sub buildQuery {
#Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so
#irrelevant here
#remove_stopwords doesn't function anymore so is irrelevant
$stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
$stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0;
}
# ISBN,ISSN,Standard Number, don't need special treatment
elsif ( $index eq 'nb' || $index eq 'ns' ) {
(
$stemming, $auto_truncation,
$weight_fields, $fuzzy_enabled,
$remove_stopwords
) = ( 0, 0, 0, 0, 0 );
$weight_fields, $fuzzy_enabled
) = ( 0, 0, 0, 0 );
if ( $index eq 'nb' ) {
if ( C4::Context->preference("SearchWithISBNVariations") ) {
@ -1594,15 +1563,6 @@ sub buildQuery {
my $index_plus = $index . $struct_attr . ':';
my $index_plus_comma = $index . $struct_attr . ',';
# Remove Stopwords
if ($remove_stopwords) {
( $operand, $stopwords_removed ) =
_remove_stopwords( $operand, $index );
warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
warn "REMOVED STOPWORDS: @$stopwords_removed"
if ( $stopwords_removed && $DEBUG );
}
if ($auto_truncation){
unless ( $index =~ /,(st-|phr|ext)/ ) {
#FIXME only valid with LTR scripts
@ -1789,7 +1749,7 @@ sub buildQuery {
return (
undef, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type
$query_type
);
}

1
INSTALL.fedora7

@ -1183,7 +1183,6 @@ MySQL> show tables;
| sessions |
| special_holidays |
| statistics |
| stopwords |
| subscription |
| subscriptionhistory |
| subscriptionroutinglist |

96
admin/stopwords.pl

@ -1,96 +0,0 @@
#!/usr/bin/perl
#script to administer the stopwords table
#written 20/02/2002 by paul.poulain@free.fr
# This software is placed under the gnu General Public License, v2 (http://www.gnu.org/licenses/gpl.html)
# Copyright 2000-2002 Katipo Communications
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Koha is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Koha; if not, see <http://www.gnu.org/licenses>.
use strict;
use warnings;
use CGI qw ( -utf8 );
use C4::Context;
use C4::Output;
use C4::Auth;
sub StringSearch {
my $sth = C4::Context->dbh->prepare("
SELECT word FROM stopwords WHERE (word LIKE ?) ORDER BY word
");
$sth->execute((shift || '') . "%");
return $sth->fetchall_arrayref({});
}
my $input = new CGI;
my $searchfield = $input->param('searchfield');
my $offset = $input->param('offset') || 0;
my $script_name = "/cgi-bin/koha/admin/stopwords.pl";
my $pagesize = 20;
my $op = $input->param('op') || '';
my ($template, $loggedinuser, $cookie)
= get_template_and_user({template_name => "admin/stopwords.tt",
query => $input,
type => "intranet",
flagsrequired => {parameters => 'parameters_remaining_permissions'},
authnotrequired => 0,
debug => 1,
});
$template->param(script_name => $script_name,
searchfield => $searchfield);
my $dbh = C4::Context->dbh;
if ($op eq 'add_form') {
$template->param(add_form => 1);
} elsif ($op eq 'add_validate') {
$template->param(add_validate => 1);
my @tab = split / |,/, $input->param('word');
my $sth=$dbh->prepare("INSERT INTO stopwords (word) VALUES (?)");
foreach my $insert_value (@tab) {
$sth->execute($insert_value);
}
} elsif ($op eq 'delete_confirm') {
$template->param(delete_confirm => 1);
} elsif ($op eq 'delete_confirmed') {
$template->param(delete_confirmed => 1);
my $sth=$dbh->prepare("delete from stopwords where word=?");
$sth->execute($searchfield);
} else { # DEFAULT
$template->param(else => 1);
my $results = StringSearch($searchfield);
my $count = scalar(@$results);
my @loop;
# FIXME: limit and offset should get to the SQL query
for (my $i=$offset; $i < ($offset+$pagesize<$count?$offset+$pagesize:$count); $i++){
push @loop, {word => $results->[$i]{'word'}};
}
$template->param(loop => \@loop);
if ($offset > 0) {
$template->param(offsetgtzero => 1,
prevpage => $offset-$pagesize);
}
if ($offset+$pagesize < scalar(@$results)) {
$template->param(ltcount => 1,
nextpage => $offset+$pagesize);
}
}
output_html_with_http_headers $input, $cookie, $template->output;

5
catalogue/search.pl

@ -481,10 +481,10 @@ my $hits;
my $expanded_facet = $params->{'expand'};
# Define some global variables
my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type);
my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type);
## I. BUILD THE QUERY
( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by,$scan,$lang);
( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by,$scan,$lang);
## parse the query_cgi string and put it into a form suitable for <input>s
my @query_inputs;
@ -611,7 +611,6 @@ for (my $i=0;$i<@servers;$i++) {
if ($query_desc || $limit_desc) {
$template->param(searchdesc => 1);
}
$template->param(stopwords_removed => "@$stopwords_removed") if $stopwords_removed;
$template->param(results_per_page => $results_per_page);
# must define a value for size if not present in DB
# in order to avoid problems generated by the default size value in TT

4
cataloguing/addbooks.pl

@ -77,8 +77,8 @@ if ($query) {
if ($QParser) {
$builtquery = $query;
} else {
my ( $builterror,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type);
( $builterror,$builtquery,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(undef,\@operands);
my ( $builterror,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type);
( $builterror,$builtquery,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(undef,\@operands);
}
# find results

99
installer/data/mysql/de-DE/mandatory/stopwords.sql

@ -1,99 +0,0 @@
INSERT INTO stopwords VALUES
('a'),
('about'),
('also'),
('an'),
('and'),
('another'),
('any'),
('are'),
('as'),
('at'),
('back'),
('be'),
('because'),
('been'),
('being'),
('but'),
('by'),
('can'),
('could'),
('did'),
('do'),
('each'),
('end'),
('even'),
('for'),
('from'),
('get'),
('go'),
('had'),
('have'),
('he'),
('her'),
('here'),
('his'),
('how'),
('i'),
('if'),
('in'),
('into'),
('is'),
('it'),
('just'),
('may'),
('me'),
('might'),
('much'),
('must'),
('my'),
('no'),
('not'),
('of'),
('off'),
('on'),
('only'),
('or'),
('other'),
('our'),
('out'),
('should'),
('so'),
('some'),
('still'),
('such'),
('than'),
('that'),
('the'),
('their'),
('them'),
('then'),
('there'),
('these'),
('they'),
('this'),
('those'),
('to'),
('too'),
('try'),
('two'),
('under'),
('up'),
('us'),
('was'),
('we'),
('were'),
('what'),
('when'),
('where'),
('which'),
('while'),
('who'),
('why'),
('will'),
('with'),
('within'),
('without'),
('would'),
('you'),
('your');

1
installer/data/mysql/de-DE/mandatory/stopwords.txt

@ -1 +0,0 @@
Englische Stoppwortliste. Sie können diese nach der Installation ändern.

99
installer/data/mysql/en/mandatory/stopwords.sql

@ -1,99 +0,0 @@
INSERT INTO stopwords VALUES
('a'),
('about'),
('also'),
('an'),
('and'),
('another'),
('any'),
('are'),
('as'),
('at'),
('back'),
('be'),
('because'),
('been'),
('being'),
('but'),
('by'),
('can'),
('could'),
('did'),
('do'),
('each'),
('end'),
('even'),
('for'),
('from'),
('get'),
('go'),
('had'),
('have'),
('he'),
('her'),
('here'),
('his'),
('how'),
('i'),
('if'),
('in'),
('into'),
('is'),
('it'),
('just'),
('may'),
('me'),
('might'),
('much'),
('must'),
('my'),
('no'),
('not'),
('of'),
('off'),
('on'),
('only'),
('or'),
('other'),
('our'),
('out'),
('should'),
('so'),
('some'),
('still'),
('such'),
('than'),
('that'),
('the'),
('their'),
('them'),
('then'),
('there'),
('these'),
('they'),
('this'),
('those'),
('to'),
('too'),
('try'),
('two'),
('under'),
('up'),
('us'),
('was'),
('we'),
('were'),
('what'),
('when'),
('where'),
('which'),
('while'),
('who'),
('why'),
('will'),
('with'),
('within'),
('without'),
('would'),
('you'),
('your');

1
installer/data/mysql/en/mandatory/stopwords.txt

@ -1 +0,0 @@
English stop words. You can change this after installation.

71
installer/data/mysql/fr-FR/1-Obligatoire/stopwords.sql

@ -1,71 +0,0 @@
# phpMyAdmin MySQL-Dump
# version 2.2.6-rc1
# http://phpwizard.net/phpMyAdmin/
# http://phpmyadmin.sourceforge.net/ (download page)
#
# Host: localhost
# Generation Time: Nov 22, 2002 at 11:10 AM
# Server version: 3.23.52
# PHP Version: 4.2.3
# Database : `koha_fr`
#
# Dumping data for table `stopwords`
#
INSERT INTO stopwords VALUES ('AU');
INSERT INTO stopwords VALUES ('ÇA');
INSERT INTO stopwords VALUES ('CAR');
INSERT INTO stopwords VALUES ('CE');
INSERT INTO stopwords VALUES ('CELA');
INSERT INTO stopwords VALUES ('CES');
INSERT INTO stopwords VALUES ('CEUX');
INSERT INTO stopwords VALUES ('CI');
INSERT INTO stopwords VALUES ('DANS');
INSERT INTO stopwords VALUES ('DE');
INSERT INTO stopwords VALUES ('DES');
INSERT INTO stopwords VALUES ('DU');
INSERT INTO stopwords VALUES ('ELLE');
INSERT INTO stopwords VALUES ('ELLES');
INSERT INTO stopwords VALUES ('EN');
INSERT INTO stopwords VALUES ('EST');
INSERT INTO stopwords VALUES ('ET');
INSERT INTO stopwords VALUES ('EU');
INSERT INTO stopwords VALUES ('IL');
INSERT INTO stopwords VALUES ('ILS');
INSERT INTO stopwords VALUES ('JE');
INSERT INTO stopwords VALUES ('LA');
INSERT INTO stopwords VALUES ('LE');
INSERT INTO stopwords VALUES ('LES');
INSERT INTO stopwords VALUES ('LEUR');
INSERT INTO stopwords VALUES ('MA');
INSERT INTO stopwords VALUES ('MAIS');
INSERT INTO stopwords VALUES ('MES');
INSERT INTO stopwords VALUES ('MON');
INSERT INTO stopwords VALUES ('NI');
INSERT INTO stopwords VALUES ('NOTRE');
INSERT INTO stopwords VALUES ('NOUS');
INSERT INTO stopwords VALUES ('OU');
INSERT INTO stopwords VALUES ('PAR');
INSERT INTO stopwords VALUES ('PAS');
INSERT INTO stopwords VALUES ('PEU');
INSERT INTO stopwords VALUES ('PEUT');
INSERT INTO stopwords VALUES ('POUR');
INSERT INTO stopwords VALUES ('QUE');
INSERT INTO stopwords VALUES ('QUI');
INSERT INTO stopwords VALUES ('SA');
INSERT INTO stopwords VALUES ('SES');
INSERT INTO stopwords VALUES ('SI');
INSERT INTO stopwords VALUES ('SIEN');
INSERT INTO stopwords VALUES ('SON');
INSERT INTO stopwords VALUES ('SOUS');
INSERT INTO stopwords VALUES ('SUR');
INSERT INTO stopwords VALUES ('TA');
INSERT INTO stopwords VALUES ('TELS');
INSERT INTO stopwords VALUES ('TES');
INSERT INTO stopwords VALUES ('TON');
INSERT INTO stopwords VALUES ('TU');
INSERT INTO stopwords VALUES ('VOTRE');
INSERT INTO stopwords VALUES ('VOUS');
INSERT INTO stopwords VALUES ('VU');

1
installer/data/mysql/fr-FR/1-Obligatoire/stopwords.txt

@ -1 +0,0 @@
Mots vides de la langue française.

194
installer/data/mysql/it-IT/necessari/stopwords.sql

@ -1,194 +0,0 @@
SET FOREIGN_KEY_CHECKS=0;
INSERT INTO `stopwords` (`word`) VALUES
('a'),
('about'),
('ad'),
('after'),
('ai'),
('al'),
('all'),
('alla'),
('alle'),
('allo'),
('also'),
('an'),
('and'),
('another'),
('any'),
('are'),
('as'),
('at'),
('b'),
('back'),
('be'),
('because'),
('been'),
('being'),
('but'),
('by'),
('c'),
('can'),
('ci'),
('col'),
('con'),
('could'),
('d'),
('da'),
('dagli'),
('dai'),
('dal'),
('dall'),
('dalla'),
('dalle'),
('dallo'),
('de'),
('degli'),
('dei'),
('del'),
('dell'),
('della'),
('delle'),
('dello'),
('di'),
('did'),
('do'),
('e'),
('each'),
('ed'),
('end'),
('et'),
('even'),
('f'),
('for'),
('fra'),
('from'),
('g'),
('get'),
('gli'),
('go'),
('h'),
('had'),
('have'),
('he'),
('her'),
('here'),
('his'),
('how'),
('however'),
('i'),
('if'),
('il'),
('in'),
('into'),
('is'),
('it'),
('j'),
('just'),
('k'),
('l'),
('la'),
('le'),
('lo'),
('m'),
('may'),
('me'),
('mi'),
('might'),
('more'),
('much'),
('must'),
('my'),
('n'),
('ne'),
('negli'),
('nel'),
('nell'),
('nella'),
('nello'),
('no'),
('non'),
('not'),
('o'),
('of'),
('off'),
('on'),
('only'),
('oppure'),
('or'),
('other'),
('our'),
('out'),
('over'),
('p'),
('per'),
('q'),
('r'),
('s'),
('saw'),
('si'),
('since'),
('should'),
('so'),
('some'),
('still'),
('su'),
('such'),
('sugli'),
('sui'),
('sul'),
('sull'),
('sulla'),
('sulle'),
('t'),
('te'),
('than'),
('that'),
('the'),
('their'),
('them'),
('then'),
('there'),
('these'),
('they'),
('this'),
('those'),
('ti'),
('to'),
('too'),
('tra'),
('try'),
('two'),
('u'),
('un'),
('una'),
('under'),
('uno'),
('up'),
('upon'),
('us'),
('v'),
('vi'),
('was'),
('we'),
('were'),
('what'),
('when'),
('where'),
('whether'),
('which'),
('while'),
('who'),
('why'),
('will'),
('with'),
('within'),
('without'),
('would'),
('x'),
('y'),
('you'),
('your'),
('z');
SET FOREIGN_KEY_CHECKS=1;

1
installer/data/mysql/it-IT/necessari/stopwords.txt

@ -1 +0,0 @@
Stopword. Possono essere modificate dopo l'installazione.

26
installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.sql

@ -1,26 +0,0 @@
--
-- Default classification sources and filing rules
-- for Koha.
--
-- Copyright (C) 2011 Magnus Enger Libriotech
--
-- This file is part of Koha.
--
-- Koha is free software; you can redistribute it and/or modify it under the
-- terms of the GNU General Public License as published by the Free Software
-- Foundation; either version 2 of the License, or (at your option) any later
-- version.
--
-- Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-- A PARTICULAR PURPOSE. See the GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License along
-- with Koha; if not, write to the Free Software Foundation, Inc.,
-- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
INSERT INTO stopwords VALUES
('eller'),
('en'),
('og'),
('som');

1
installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.txt

@ -1 +0,0 @@
Norske stoppord. Du kan endre disse etter at installasjonen er fullført. (NB! Vil ikke bli benyttet dersom du velger Zebra for indeksering.)

99
installer/data/mysql/pl-PL/mandatory/stopwords.sql

@ -1,99 +0,0 @@
INSERT INTO stopwords VALUES
('a'),
('about'),
('also'),
('an'),
('and'),
('another'),
('any'),
('are'),
('as'),
('at'),
('back'),
('be'),
('because'),
('been'),
('being'),
('but'),
('by'),
('can'),
('could'),
('did'),
('do'),
('each'),
('end'),
('even'),
('for'),
('from'),
('get'),
('go'),
('had'),
('have'),
('he'),
('her'),
('here'),
('his'),
('how'),
('i'),
('if'),
('in'),
('into'),
('is'),
('it'),
('just'),
('may'),
('me'),
('might'),
('much'),
('must'),
('my'),
('no'),
('not'),
('of'),
('off'),
('on'),
('only'),
('or'),
('other'),
('our'),
('out'),
('should'),
('so'),
('some'),
('still'),
('such'),
('than'),
('that'),
('the'),
('their'),
('them'),
('then'),
('there'),
('these'),
('they'),
('this'),
('those'),
('to'),
('too'),
('try'),
('two'),
('under'),
('up'),
('us'),
('was'),
('we'),
('were'),
('what'),
('when'),
('where'),
('which'),
('while'),
('who'),
('why'),
('will'),
('with'),
('within'),
('without'),
('would'),
('you'),
('your');

1
installer/data/mysql/pl-PL/mandatory/stopwords.txt

@ -1 +0,0 @@
Angielskie stop words. Możesz je zmienić po intalacji.

21
installer/data/mysql/ru-RU/mandatory/stopwords.sql

@ -1,21 +0,0 @@
TRUNCATE stopwords;
INSERT INTO stopwords VALUES
( 'к'),
( 'и'),
( 'в'),
( 'на'),
( 'да'),
( 'то'),
( 'где'),
( 'еле'),
( 'это'),
( 'что'),
( 'ведь'),
( 'даже'),
( 'почти'),
( 'такой'),
( 'также'),
( 'значит'),
( 'немного'),
( 'который');

1
installer/data/mysql/ru-RU/mandatory/stopwords.txt

@ -1 +0,0 @@
Несущественные для поиска русские слова. Вы можете корректировать их после установки.

29
installer/data/mysql/uk-UA/mandatory/stopwords.sql

@ -1,29 +0,0 @@
TRUNCATE stopwords;
INSERT INTO stopwords VALUES
('адже'),
('авжеж'),
('в'),
('де'),
('дещо'),
('до'),
('й'),
('ледве'),
('майже'),
('на'),
('навіть'),
('отже'),
('отож'),
('під'),
('так'),
('такий'),
('також'),
('те'),
('тобто'),
('тож'),
('тощо'),
('у'),
('це'),
('що'),
('як'),
('який');

3
koha-tmpl/intranet-tmpl/prog/en/includes/prefs-admin-search.inc

@ -1,5 +1,5 @@
<div class="gradient">
<h1 id="logo"><a href="/cgi-bin/koha/mainpage.pl">[% LibraryName %]</a></h1><!-- Begin Stopwords Resident Search Box -->
<h1 id="logo"><a href="/cgi-bin/koha/mainpage.pl">[% LibraryName %]</a></h1>
<div id="header_search">
<div id="syspref_search" class="residentsearch">
<p class="tip">System preference search:</p>
@ -27,4 +27,3 @@
</ul>
</div>
</div>
<!-- End Stopwords Resident Search Box -->

28
koha-tmpl/intranet-tmpl/prog/en/includes/stopwords-admin-search.inc

@ -1,28 +0,0 @@
<div class="gradient">
<h1 id="logo"><a href="/cgi-bin/koha/mainpage.pl">[% LibraryName %]</a></h1><!-- Begin Stopwords Resident Search Box -->
<div id="header_search">
<div id="stopword_search" class="residentsearch">
<p class="tip">Stop word search:</p>
<form action="[% script_name %]" method="post">
<input class="head-searchbox" type="text" size="40" name="searchfield" value="[% searchfield %]" />
<input type="submit" name="ok" class="submit" value="Search" />
</form>
</div>
[% INCLUDE 'patron-search-box.inc' %]
[% IF ( CAN_user_catalogue ) %]
<div id="catalog_search" class="residentsearch">
<p class="tip">Enter search keywords:</p>
<form action="/cgi-bin/koha/catalogue/search.pl" method="get" id="cat-search-block">
<input type="text" name="q" id="search-form" size="40" value="" title="Enter the terms you wish to search for." class="head-searchbox form-text" />
<input type="submit" value="Submit" class="submit" />
</form>
</div>
[% END %]
<ul>
<li><a onclick="keep_text(0)" href="#stopword_search">Search stop words</a></li>
[% IF ( CAN_user_circulate ) %]<li><a onclick="keep_text(1)" href="#circ_search">Check out</a></li>[% END %]
[% IF ( CAN_user_catalogue ) %]<li><a onclick="keep_text(2)" href="#catalog_search">Search the catalog</a></li>[% END %]
</ul>
</div>
</div><!-- /gradient -->
<!-- End Stopwords Resident Search Box -->

146
koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tt

@ -1,146 +0,0 @@
[% INCLUDE 'doc-head-open.inc' %]
<title>Koha &rsaquo; System administration &rsaquo; Stop words
[% IF ( add_form ) %]&rsaquo; [% IF ( searchfield ) %]Modify[% ELSE %]New[% END %] stop word
[% ELSIF ( add_validate ) %]&rsaquo; Data recorded
[% ELSIF ( delete_confirm ) %]&rsaquo; Delete stop word '[% searchfield %]' ?
[% ELSIF ( delete_confirmed ) %]&rsaquo; Data deleted
[% END %]
</title>
[% INCLUDE 'doc-head-close.inc' %]
<script type="text/javascript">
//<![CDATA[
$(document).ready(function() {
new YAHOO.widget.Button("newstopword");
});
function Check(f) {
if (f.word.value.length==0) {
alert(_("Form not submitted: word missing"));
} else {
document.Aform.submit();
}
}
//]]>
</script>
</head>
<body id="admin_stopwords" class="admin">
[% INCLUDE 'header.inc' %]
[% INCLUDE 'stopwords-admin-search.inc' %]
<div id="breadcrumbs">
<a href="/cgi-bin/koha/mainpage.pl">Home</a>
&rsaquo; <a href="/cgi-bin/koha/admin/admin-home.pl">Administration</a>
&rsaquo; <a href="/cgi-bin/koha/admin/stopwords.pl">Stop words</a>
[% IF ( add_form ) %]
&rsaquo; [% IF ( searchfield ) %]Modify[% ELSE %]New[% END %] Stop word
[% ELSIF ( add_validate ) %]
&rsaquo; Data recorded
[% ELSIF ( delete_confirm ) %]
&rsaquo; Delete stop word '[% searchfield %]' ?
[% ELSIF ( delete_confirmed ) %]
&rsaquo; Data deleted
[% END %]
</div>
<div id="doc3" class="yui-t2">
<div id="bd">
<div id="yui-main">
<div class="yui-b">
[% IF ( add_form ) %]
[% IF ( searchfield ) %]
<h1>Modify word</h1>
[% ELSE %]
<h1>New word</h1>
[% END %]
<form action="[% script_name %]" name="Aform" method="post">
<input type="hidden" name="op" value="add_validate" />
<fieldset class="rows">
<ol><li>
[% IF ( searchfield ) %]
<span class="label">Word</span>
<input type="hidden" name="word" value="[% searchfield %]" />[% searchfield %]
[% ELSE %]
<label for="word">Word</label>
<input type="text" name="word" id="word" size="50" maxlength="250" onblur="toUC(this)" />
[% END %]
</li>
</ol>
</fieldset>
<fieldset class="action">
<input type="button" value="Save" onclick="Check(this.form)" />
<a class="cancel" href="/cgi-bin/koha/admin/stopwords.pl">Cancel</a>
</fieldset>
</form>
[% END %]
[% IF ( add_validate ) %]
<div class="dialog message"> <h3>Data recorded</h3>
<form action="[% script_name %]" method="post">
<input type="submit" value="OK" class="approve" />
</form></div>
[% END %]
[% IF ( delete_confirm ) %]
<div class="dialog alert">
<h3>Delete stop word <span class="ex">'[% searchfield %]'</span></h3>
<form action="[% script_name %]" method="post">
<input type="hidden" name="op" value="delete_confirmed" />
<input type="hidden" name="searchfield" value="[% searchfield %]" />
<input type="submit" value="Yes, delete" class="approve" />
</form>
<form action="[% script_name %]" method="get">
<input type="submit" class="deny" value="No, do not delete" />
</form></div>
[% END %]
[% IF ( delete_confirmed ) %]
<div class="dialog message"> <h3>Data deleted</h3>
<form action="[% script_name %]" method="post">
<input type="submit" value="OK" class="approve" />
</form></div>
[% END %]
[% IF ( else ) %]
<div id="toolbar">
<ul class="toolbar">
<li><a id="newstopword" href="/cgi-bin/koha/admin/stopwords.pl?op=add_form">New stop word</a></li>
</ul></div>
<h1>Stop words</h1>
<p class="message">NOTE : if you change something in this table, ask your administrator to run misc/batchRebuildBiblioTables.pl script.</p>
[% IF ( searchfield ) %]
<p>You searched for <b>[% searchfield %]</b></p>
[% END %]
<table>
<tr><th>Word</th>
<th></th>
</tr>
[% FOREACH loo IN loop %]
<tr>
<td>[% loo.word %]</td>
<td><a href="[% loo.script_name %]?op=delete_confirm&amp;searchfield=[% loo.word %]">Delete</a></td>
</tr>
[% END %]
</table>
<div class="pages">
[% IF ( offsetgtzero ) %]
<a href="[% script_name %]?offset=[% prevpage %]">&lt;&lt; Previous</a>
[% END %]
[% IF ( ltcount ) %]
<a href="[% script_name %]?offset=[% nextpage %]">Next &gt;&gt;</a>
[% END %]
</div>
[% END %]
</div>
</div>
<div class="yui-b">
[% INCLUDE 'admin-menu.inc' %]
</div>
</div>
[% INCLUDE 'intranet-bottom.inc' %]

1
koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tt

@ -368,7 +368,6 @@ var holdForPatron = function () {
[% IF ( CAN_user_editcatalogue_edit_catalogue ) %] <div class="btn-group"><a class="btn btn-mini" id="z3950submit" href="#"><i class="fa fa-search"></i> Z39.50/SRU search</a></div>[% END %]
</div>
</div>
[% IF ( stopwords_removed ) %]<div><p class="tip">Ignored the following common words: "[% stopwords_removed %]"<p></div>[% END %]
[% ELSE %]
<div id="searchheader">
<form method="post" name="fz3950" class="fz3950bigrpad">

2
misc/batchRebuildBiblioTables.pl

@ -53,7 +53,7 @@ $starttime = gettimeofday;
#1st of all, find item MARC tag.
my ($tagfield,$tagsubfield) = &GetMarcFromKohaField("items.itemnumber",'');
# $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write, stopwords write");
# $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write");
my $sth = $dbh->prepare("SELECT biblionumber FROM biblio");
$sth->execute;
# my ($biblionumbermax) = $sth->fetchrow;

5
opac/opac-search.pl

@ -517,12 +517,12 @@ my $hits;
my $expanded_facet = $params->{'expand'};
# Define some global variables
my ($error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type);
my ($error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type);
my @results;
## I. BUILD THE QUERY
( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang);
( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang);
sub _input_cgi_parse {
my @elements;
@ -777,7 +777,6 @@ for (my $i=0;$i<@servers;$i++) {
if ($query_desc || $limit_desc) {
$template->param(searchdesc => 1);
}
$template->param(stopwords_removed => "@$stopwords_removed") if $stopwords_removed;
$template->param(results_per_page => $results_per_page);
my $hide = C4::Context->preference('OpacHiddenItems');
$hide = ($hide =~ m/\S/) if $hide; # Just in case it has some spaces/new lines

97
t/db_dependent/Search.t

@ -92,7 +92,6 @@ our $QueryStemming = 0;
our $QueryAutoTruncate = 0;
our $QueryWeightFields = 0;
our $QueryFuzzy = 0;
our $QueryRemoveStopwords = 0;
our $UseQueryParser = 0;
our $marcflavour = 'MARC21';
our $contextmodule = new Test::MockModule('C4::Context');
@ -108,8 +107,6 @@ $contextmodule->mock('preference', sub {
return $QueryWeightFields;
} elsif ($pref eq 'QueryFuzzy') {
return $QueryFuzzy;
} elsif ($pref eq 'QueryRemoveStopwords') {
return $QueryRemoveStopwords;
} elsif ($pref eq 'UseQueryParser') {
return $UseQueryParser;
} elsif ($pref eq 'maxRecordsForFacets') {
@ -212,22 +209,9 @@ sub run_marc21_search_tests {
$QueryAutoTruncate = 0;
$QueryWeightFields = 0;
$QueryFuzzy = 0;
$QueryRemoveStopwords = 0;
$UseQueryParser = 0;
$marcflavour = 'MARC21';
foreach my $string ("Leçon","modèles") {
my @results=C4::Search::_remove_stopwords($string,"kw");
$debug && warn "$string ",Dump(@results);
ok($results[0] eq $string,"$string is not modified");
}
foreach my $string ("A book about the stars") {
my @results=C4::Search::_remove_stopwords($string,"kw");
$debug && warn "$string ",Dump(@results);
ok($results[0] ne $string,"$results[0] from $string");
}
my $indexes = C4::Search::getIndexes();
is(scalar(grep(/^ti$/, @$indexes)), 1, "Title index supported");
@ -434,10 +418,10 @@ if ( $indexing_mode eq 'dom' ) {
my ( $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type );
$query_type );
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ 'salud' ], [], [], [], 0, 'en');
$query_type ) = buildQuery([], [ 'salud' ], [], [], [], 0, 'en');
like($query, qr/kw\W.*salud/, "Built CCL keyword query");
($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0);
@ -449,7 +433,7 @@ if ( $indexing_mode eq 'dom' ) {
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([ 'and' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en');
$query_type ) = buildQuery([ 'and' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en');
like($query, qr/kw\W.*salud\W.*and.*kw\W.*higiene/, "Built composed explicit-and CCL keyword query");
($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0);
@ -457,7 +441,7 @@ if ( $indexing_mode eq 'dom' ) {
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([ 'or' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en');
$query_type ) = buildQuery([ 'or' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en');
like($query, qr/kw\W.*salud\W.*or.*kw\W.*higiene/, "Built composed explicit-or CCL keyword query");
($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0);
@ -465,7 +449,7 @@ if ( $indexing_mode eq 'dom' ) {
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ 'salud', 'higiene' ], [], [], [], 0, 'en');
$query_type ) = buildQuery([], [ 'salud', 'higiene' ], [], [], [], 0, 'en');
like($query, qr/kw\W.*salud\W.*and.*kw\W.*higiene/, "Built composed implicit-and CCL keyword query");
($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0);
@ -473,7 +457,7 @@ if ( $indexing_mode eq 'dom' ) {
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ 'salud' ], [ 'kw' ], [ 'su-to:Laboratorios' ], [], 0, 'en');
$query_type ) = buildQuery([], [ 'salud' ], [ 'kw' ], [ 'su-to:Laboratorios' ], [], 0, 'en');
like($query, qr/kw\W.*salud\W*and\W*su-to\W.*Laboratorios/, "Faceted query generated correctly");
unlike($query_desc, qr/Laboratorios/, "Facets not included in query description");
@ -483,7 +467,7 @@ if ( $indexing_mode eq 'dom' ) {
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-itype:MP', 'mc-itype:MU' ], [], 0, 'en');
$query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-itype:MP', 'mc-itype:MU' ], [], 0, 'en');
($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0);
is($results_hashref->{biblioserver}->{hits}, 2, "getRecords generated mc-faceted search matched right number of records");
@ -491,14 +475,14 @@ if ( $indexing_mode eq 'dom' ) {
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,
$stopwords_removed, $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-loc:GEN', 'branch:FFL' ], [], 0, 'en');
$query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-loc:GEN', 'branch:FFL' ], [], 0, 'en');
($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0);
is($results_hashref->{biblioserver}->{hits}, 2, "getRecords generated multi-faceted search matched right number of records");
( $error, $query, $simple_query, $query_cgi,
$query_desc, $limit, $limit_cgi, $limit_desc,