From 826326064ee0530428944e51eb778a44e29bbcd0 Mon Sep 17 00:00:00 2001 From: Tomas Cohen Arazi Date: Wed, 10 Apr 2013 10:52:54 -0300 Subject: [PATCH] Bug 9819 - 'stopwords'-related code removed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch removes code related to stopwords usage. The following methods are removed: C4::Search->remove_stopwords C4::Context->stopwords C4::Context->_new_stopwords And the buildQuery API was changed (removed the \@removed_stopwords return value). A follow-up is provided for database changes, to make rebasing easier. To test: - Apply this patch - Do some searches in both intranet and opac interfaces - Nothing should break Sponsored-by: Universidad Nacional de Córdoba Signed-off-by: Kyle M Hall Signed-off-by: Chris Cormack Signed-off-by: Kyle M Hall --- C4/Context.pm | 46 ----- C4/Search.pm | 58 +----- INSTALL.fedora7 | 1 - admin/stopwords.pl | 96 --------- catalogue/search.pl | 5 +- cataloguing/addbooks.pl | 4 +- .../data/mysql/de-DE/mandatory/stopwords.sql | 99 --------- .../data/mysql/de-DE/mandatory/stopwords.txt | 1 - .../data/mysql/en/mandatory/stopwords.sql | 99 --------- .../data/mysql/en/mandatory/stopwords.txt | 1 - .../mysql/fr-FR/1-Obligatoire/stopwords.sql | 71 ------- .../mysql/fr-FR/1-Obligatoire/stopwords.txt | 1 - .../data/mysql/it-IT/necessari/stopwords.sql | 194 ------------------ .../data/mysql/it-IT/necessari/stopwords.txt | 1 - .../mysql/nb-NO/1-Obligatorisk/stopwords.sql | 26 --- .../mysql/nb-NO/1-Obligatorisk/stopwords.txt | 1 - .../data/mysql/pl-PL/mandatory/stopwords.sql | 99 --------- .../data/mysql/pl-PL/mandatory/stopwords.txt | 1 - .../data/mysql/ru-RU/mandatory/stopwords.sql | 21 -- .../data/mysql/ru-RU/mandatory/stopwords.txt | 1 - .../data/mysql/uk-UA/mandatory/stopwords.sql | 29 --- .../prog/en/includes/prefs-admin-search.inc | 3 +- .../en/includes/stopwords-admin-search.inc | 28 --- .../prog/en/modules/admin/stopwords.tt | 146 ------------- .../prog/en/modules/catalogue/results.tt | 1 - misc/batchRebuildBiblioTables.pl | 2 +- opac/opac-search.pl | 5 +- t/db_dependent/Search.t | 97 ++++----- test/search.pl | 4 - 29 files changed, 57 insertions(+), 1084 deletions(-) delete mode 100755 admin/stopwords.pl delete mode 100644 installer/data/mysql/de-DE/mandatory/stopwords.sql delete mode 100644 installer/data/mysql/de-DE/mandatory/stopwords.txt delete mode 100644 installer/data/mysql/en/mandatory/stopwords.sql delete mode 100644 installer/data/mysql/en/mandatory/stopwords.txt delete mode 100644 installer/data/mysql/fr-FR/1-Obligatoire/stopwords.sql delete mode 100644 installer/data/mysql/fr-FR/1-Obligatoire/stopwords.txt delete mode 100644 installer/data/mysql/it-IT/necessari/stopwords.sql delete mode 100644 installer/data/mysql/it-IT/necessari/stopwords.txt delete mode 100644 installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.sql delete mode 100644 installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.txt delete mode 100644 installer/data/mysql/pl-PL/mandatory/stopwords.sql delete mode 100644 installer/data/mysql/pl-PL/mandatory/stopwords.txt delete mode 100644 installer/data/mysql/ru-RU/mandatory/stopwords.sql delete mode 100644 installer/data/mysql/ru-RU/mandatory/stopwords.txt delete mode 100644 installer/data/mysql/uk-UA/mandatory/stopwords.sql delete mode 100644 koha-tmpl/intranet-tmpl/prog/en/includes/stopwords-admin-search.inc delete mode 100644 koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tt diff --git a/C4/Context.pm b/C4/Context.pm index e314f5a01b..341438ae1e 100644 --- a/C4/Context.pm +++ b/C4/Context.pm @@ -128,8 +128,6 @@ C4::Context - Maintain and manipulate the context of a Koha script $Zconn = C4::Context->Zconn; - $stopwordhash = C4::Context->stopwords; - =head1 DESCRIPTION When a Koha script runs, it makes use of a certain number of things: @@ -365,7 +363,6 @@ sub new { return if !defined($self->{"config"}); $self->{"Zconn"} = undef; # Zebra Connections - $self->{"stopwords"} = undef; # stopwords list $self->{"marcfromkohafield"} = undef; # the hash with relations between koha table fields and MARC field/subfield $self->{"userenv"} = undef; # User env $self->{"activeuser"} = undef; # current active user @@ -908,8 +905,6 @@ sub marcfromkohafield } # _new_marcfromkohafield -# Internal helper function (not a method!). This creates a new -# hash with stopwords sub _new_marcfromkohafield { my $dbh = C4::Context->dbh; @@ -923,47 +918,6 @@ sub _new_marcfromkohafield return $marcfromkohafield; } -=head2 stopwords - - $dbh = C4::Context->stopwords; - -Returns a hash with stopwords. - -This hash is cached for future use: if you call -Cstopwords> twice, you will get the same hash without real DB access - -=cut - -#' -sub stopwords -{ - my $retval = {}; - - # If the hash already exists, return it. - return $context->{"stopwords"} if defined($context->{"stopwords"}); - - # No hash. Create one. - $context->{"stopwords"} = &_new_stopwords(); - - return $context->{"stopwords"}; -} - -# _new_stopwords -# Internal helper function (not a method!). This creates a new -# hash with stopwords -sub _new_stopwords -{ - my $dbh = C4::Context->dbh; - my $stopwordlist; - my $sth = $dbh->prepare("select word from stopwords"); - $sth->execute; - while (my $stopword = $sth->fetchrow_array) { - $stopwordlist->{$stopword} = uc($stopword); - } - $stopwordlist->{A} = "A" unless $stopwordlist; - return $stopwordlist; -} - =head2 userenv C4::Context->userenv; diff --git a/C4/Search.pm b/C4/Search.pm index b895993dab..b6cebfed2d 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -911,32 +911,6 @@ sub pazGetRecords { return ( undef, $results_hashref, \@facets_loop ); } -# STOPWORDS -sub _remove_stopwords { - my ( $operand, $index ) = @_; - my @stopwords_removed; - - # phrase and exact-qualified indexes shouldn't have stopwords removed - if ( $index !~ m/,(phr|ext)/ ) { - -# remove stopwords from operand : parse all stopwords & remove them (case insensitive) -# we use IsAlpha unicode definition, to deal correctly with diacritics. -# otherwise, a French word like "leçon" would be split into "le" "çon", "le" -# is a stopword, we'd get "çon" and wouldn't find anything... -# - foreach ( keys %{ C4::Context->stopwords } ) { - next if ( $_ =~ /(and|or|not)/ ); # don't remove operators - if ( my ($matched) = ($operand =~ - /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi)) - { - $operand =~ s/\Q$matched\E/ /gi; - push @stopwords_removed, $_; - } - } - } - return ( $operand, \@stopwords_removed ); -} - # TRUNCATION sub _detect_truncation { my ( $operand, $index ) = @_; @@ -1416,10 +1390,10 @@ sub parseQuery { $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, -$stopwords_removed, $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang); +$query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang); Build queries and limits in CCL, CGI, Human, -handle truncation, stemming, field weighting, stopwords, fuzziness, etc. +handle truncation, stemming, field weighting, fuzziness, etc. See verbose embedded documentation. @@ -1445,7 +1419,6 @@ sub buildQuery { my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0; my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; - my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0; my $query = $operands[0]; my $simple_query = $operands[0]; @@ -1458,8 +1431,6 @@ sub buildQuery { my $limit_cgi; my $limit_desc; - my $stopwords_removed; # flag to determine if stopwords have been removed - my $cclq = 0; my $cclindexes = getIndexes(); if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) { @@ -1503,7 +1474,7 @@ sub buildQuery { # return ( # undef, $query, $simple_query, $query_cgi, # $query, $limit, $limit_cgi, $limit_desc, -# $stopwords_removed, 'ccl' +# 'ccl' # ); # } @@ -1527,11 +1498,10 @@ sub buildQuery { # A flag to determine whether or not to add the index to the query my $indexes_set; -# If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling +# If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling if ( $operands[$i] =~ /\w(:|=)/ || $scan ) { $weight_fields = 0; $stemming = 0; - $remove_stopwords = 0; } else { $operands[$i] =~ s/\?/{?}/g; # need to escape question marks } @@ -1550,7 +1520,7 @@ sub buildQuery { #weight_fields/relevance search causes errors with date ranges #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range) #In the case of YYYY-YYYY, it will return no results - $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; + $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0; } # Date of Acquisition @@ -1561,15 +1531,14 @@ sub buildQuery { #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so #irrelevant here #remove_stopwords doesn't function anymore so is irrelevant - $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; + $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0; } # ISBN,ISSN,Standard Number, don't need special treatment elsif ( $index eq 'nb' || $index eq 'ns' ) { ( $stemming, $auto_truncation, - $weight_fields, $fuzzy_enabled, - $remove_stopwords - ) = ( 0, 0, 0, 0, 0 ); + $weight_fields, $fuzzy_enabled + ) = ( 0, 0, 0, 0 ); if ( $index eq 'nb' ) { if ( C4::Context->preference("SearchWithISBNVariations") ) { @@ -1594,15 +1563,6 @@ sub buildQuery { my $index_plus = $index . $struct_attr . ':'; my $index_plus_comma = $index . $struct_attr . ','; - # Remove Stopwords - if ($remove_stopwords) { - ( $operand, $stopwords_removed ) = - _remove_stopwords( $operand, $index ); - warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG; - warn "REMOVED STOPWORDS: @$stopwords_removed" - if ( $stopwords_removed && $DEBUG ); - } - if ($auto_truncation){ unless ( $index =~ /,(st-|phr|ext)/ ) { #FIXME only valid with LTR scripts @@ -1789,7 +1749,7 @@ sub buildQuery { return ( undef, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type + $query_type ); } diff --git a/INSTALL.fedora7 b/INSTALL.fedora7 index b2a9fcd79d..097c2ec966 100644 --- a/INSTALL.fedora7 +++ b/INSTALL.fedora7 @@ -1183,7 +1183,6 @@ MySQL> show tables; | sessions | | special_holidays | | statistics | -| stopwords | | subscription | | subscriptionhistory | | subscriptionroutinglist | diff --git a/admin/stopwords.pl b/admin/stopwords.pl deleted file mode 100755 index 6d6aad916c..0000000000 --- a/admin/stopwords.pl +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/perl - -#script to administer the stopwords table -#written 20/02/2002 by paul.poulain@free.fr -# This software is placed under the gnu General Public License, v2 (http://www.gnu.org/licenses/gpl.html) - -# Copyright 2000-2002 Katipo Communications -# -# This file is part of Koha. -# -# Koha is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# Koha is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Koha; if not, see . - -use strict; -use warnings; -use CGI qw ( -utf8 ); -use C4::Context; -use C4::Output; -use C4::Auth; - -sub StringSearch { - my $sth = C4::Context->dbh->prepare(" - SELECT word FROM stopwords WHERE (word LIKE ?) ORDER BY word - "); - $sth->execute((shift || '') . "%"); - return $sth->fetchall_arrayref({}); -} - -my $input = new CGI; -my $searchfield = $input->param('searchfield'); -my $offset = $input->param('offset') || 0; -my $script_name = "/cgi-bin/koha/admin/stopwords.pl"; - -my $pagesize = 20; -my $op = $input->param('op') || ''; - -my ($template, $loggedinuser, $cookie) - = get_template_and_user({template_name => "admin/stopwords.tt", - query => $input, - type => "intranet", - flagsrequired => {parameters => 'parameters_remaining_permissions'}, - authnotrequired => 0, - debug => 1, - }); - -$template->param(script_name => $script_name, - searchfield => $searchfield); - -my $dbh = C4::Context->dbh; -if ($op eq 'add_form') { - $template->param(add_form => 1); -} elsif ($op eq 'add_validate') { - $template->param(add_validate => 1); - my @tab = split / |,/, $input->param('word'); - my $sth=$dbh->prepare("INSERT INTO stopwords (word) VALUES (?)"); - foreach my $insert_value (@tab) { - $sth->execute($insert_value); - } -} elsif ($op eq 'delete_confirm') { - $template->param(delete_confirm => 1); -} elsif ($op eq 'delete_confirmed') { - $template->param(delete_confirmed => 1); - my $sth=$dbh->prepare("delete from stopwords where word=?"); - $sth->execute($searchfield); -} else { # DEFAULT - $template->param(else => 1); - my $results = StringSearch($searchfield); - my $count = scalar(@$results); - my @loop; - # FIXME: limit and offset should get to the SQL query - for (my $i=$offset; $i < ($offset+$pagesize<$count?$offset+$pagesize:$count); $i++){ - push @loop, {word => $results->[$i]{'word'}}; - } - $template->param(loop => \@loop); - if ($offset > 0) { - $template->param(offsetgtzero => 1, - prevpage => $offset-$pagesize); - } - if ($offset+$pagesize < scalar(@$results)) { - $template->param(ltcount => 1, - nextpage => $offset+$pagesize); - } -} - -output_html_with_http_headers $input, $cookie, $template->output; - diff --git a/catalogue/search.pl b/catalogue/search.pl index ef0bc8cbb7..69f6ac443e 100755 --- a/catalogue/search.pl +++ b/catalogue/search.pl @@ -481,10 +481,10 @@ my $hits; my $expanded_facet = $params->{'expand'}; # Define some global variables -my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type); +my ( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type); ## I. BUILD THE QUERY -( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by,$scan,$lang); +( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by,$scan,$lang); ## parse the query_cgi string and put it into a form suitable for s my @query_inputs; @@ -611,7 +611,6 @@ for (my $i=0;$i<@servers;$i++) { if ($query_desc || $limit_desc) { $template->param(searchdesc => 1); } - $template->param(stopwords_removed => "@$stopwords_removed") if $stopwords_removed; $template->param(results_per_page => $results_per_page); # must define a value for size if not present in DB # in order to avoid problems generated by the default size value in TT diff --git a/cataloguing/addbooks.pl b/cataloguing/addbooks.pl index 038f2918b7..37ac83a632 100755 --- a/cataloguing/addbooks.pl +++ b/cataloguing/addbooks.pl @@ -77,8 +77,8 @@ if ($query) { if ($QParser) { $builtquery = $query; } else { - my ( $builterror,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type); - ( $builterror,$builtquery,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(undef,\@operands); + my ( $builterror,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type); + ( $builterror,$builtquery,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(undef,\@operands); } # find results diff --git a/installer/data/mysql/de-DE/mandatory/stopwords.sql b/installer/data/mysql/de-DE/mandatory/stopwords.sql deleted file mode 100644 index 1bf4725b68..0000000000 --- a/installer/data/mysql/de-DE/mandatory/stopwords.sql +++ /dev/null @@ -1,99 +0,0 @@ -INSERT INTO stopwords VALUES -('a'), -('about'), -('also'), -('an'), -('and'), -('another'), -('any'), -('are'), -('as'), -('at'), -('back'), -('be'), -('because'), -('been'), -('being'), -('but'), -('by'), -('can'), -('could'), -('did'), -('do'), -('each'), -('end'), -('even'), -('for'), -('from'), -('get'), -('go'), -('had'), -('have'), -('he'), -('her'), -('here'), -('his'), -('how'), -('i'), -('if'), -('in'), -('into'), -('is'), -('it'), -('just'), -('may'), -('me'), -('might'), -('much'), -('must'), -('my'), -('no'), -('not'), -('of'), -('off'), -('on'), -('only'), -('or'), -('other'), -('our'), -('out'), -('should'), -('so'), -('some'), -('still'), -('such'), -('than'), -('that'), -('the'), -('their'), -('them'), -('then'), -('there'), -('these'), -('they'), -('this'), -('those'), -('to'), -('too'), -('try'), -('two'), -('under'), -('up'), -('us'), -('was'), -('we'), -('were'), -('what'), -('when'), -('where'), -('which'), -('while'), -('who'), -('why'), -('will'), -('with'), -('within'), -('without'), -('would'), -('you'), -('your'); diff --git a/installer/data/mysql/de-DE/mandatory/stopwords.txt b/installer/data/mysql/de-DE/mandatory/stopwords.txt deleted file mode 100644 index 34eb26c241..0000000000 --- a/installer/data/mysql/de-DE/mandatory/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -Englische Stoppwortliste. Sie können diese nach der Installation ändern. diff --git a/installer/data/mysql/en/mandatory/stopwords.sql b/installer/data/mysql/en/mandatory/stopwords.sql deleted file mode 100644 index 1bf4725b68..0000000000 --- a/installer/data/mysql/en/mandatory/stopwords.sql +++ /dev/null @@ -1,99 +0,0 @@ -INSERT INTO stopwords VALUES -('a'), -('about'), -('also'), -('an'), -('and'), -('another'), -('any'), -('are'), -('as'), -('at'), -('back'), -('be'), -('because'), -('been'), -('being'), -('but'), -('by'), -('can'), -('could'), -('did'), -('do'), -('each'), -('end'), -('even'), -('for'), -('from'), -('get'), -('go'), -('had'), -('have'), -('he'), -('her'), -('here'), -('his'), -('how'), -('i'), -('if'), -('in'), -('into'), -('is'), -('it'), -('just'), -('may'), -('me'), -('might'), -('much'), -('must'), -('my'), -('no'), -('not'), -('of'), -('off'), -('on'), -('only'), -('or'), -('other'), -('our'), -('out'), -('should'), -('so'), -('some'), -('still'), -('such'), -('than'), -('that'), -('the'), -('their'), -('them'), -('then'), -('there'), -('these'), -('they'), -('this'), -('those'), -('to'), -('too'), -('try'), -('two'), -('under'), -('up'), -('us'), -('was'), -('we'), -('were'), -('what'), -('when'), -('where'), -('which'), -('while'), -('who'), -('why'), -('will'), -('with'), -('within'), -('without'), -('would'), -('you'), -('your'); diff --git a/installer/data/mysql/en/mandatory/stopwords.txt b/installer/data/mysql/en/mandatory/stopwords.txt deleted file mode 100644 index e4dbf0f2c8..0000000000 --- a/installer/data/mysql/en/mandatory/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -English stop words. You can change this after installation. diff --git a/installer/data/mysql/fr-FR/1-Obligatoire/stopwords.sql b/installer/data/mysql/fr-FR/1-Obligatoire/stopwords.sql deleted file mode 100644 index 08a246db66..0000000000 --- a/installer/data/mysql/fr-FR/1-Obligatoire/stopwords.sql +++ /dev/null @@ -1,71 +0,0 @@ -# phpMyAdmin MySQL-Dump -# version 2.2.6-rc1 -# http://phpwizard.net/phpMyAdmin/ -# http://phpmyadmin.sourceforge.net/ (download page) -# -# Host: localhost -# Generation Time: Nov 22, 2002 at 11:10 AM -# Server version: 3.23.52 -# PHP Version: 4.2.3 -# Database : `koha_fr` - -# -# Dumping data for table `stopwords` -# - -INSERT INTO stopwords VALUES ('AU'); -INSERT INTO stopwords VALUES ('ÇA'); -INSERT INTO stopwords VALUES ('CAR'); -INSERT INTO stopwords VALUES ('CE'); -INSERT INTO stopwords VALUES ('CELA'); -INSERT INTO stopwords VALUES ('CES'); -INSERT INTO stopwords VALUES ('CEUX'); -INSERT INTO stopwords VALUES ('CI'); -INSERT INTO stopwords VALUES ('DANS'); -INSERT INTO stopwords VALUES ('DE'); -INSERT INTO stopwords VALUES ('DES'); -INSERT INTO stopwords VALUES ('DU'); -INSERT INTO stopwords VALUES ('ELLE'); -INSERT INTO stopwords VALUES ('ELLES'); -INSERT INTO stopwords VALUES ('EN'); -INSERT INTO stopwords VALUES ('EST'); -INSERT INTO stopwords VALUES ('ET'); -INSERT INTO stopwords VALUES ('EU'); -INSERT INTO stopwords VALUES ('IL'); -INSERT INTO stopwords VALUES ('ILS'); -INSERT INTO stopwords VALUES ('JE'); -INSERT INTO stopwords VALUES ('LA'); -INSERT INTO stopwords VALUES ('LE'); -INSERT INTO stopwords VALUES ('LES'); -INSERT INTO stopwords VALUES ('LEUR'); -INSERT INTO stopwords VALUES ('MA'); -INSERT INTO stopwords VALUES ('MAIS'); -INSERT INTO stopwords VALUES ('MES'); -INSERT INTO stopwords VALUES ('MON'); -INSERT INTO stopwords VALUES ('NI'); -INSERT INTO stopwords VALUES ('NOTRE'); -INSERT INTO stopwords VALUES ('NOUS'); -INSERT INTO stopwords VALUES ('OU'); -INSERT INTO stopwords VALUES ('PAR'); -INSERT INTO stopwords VALUES ('PAS'); -INSERT INTO stopwords VALUES ('PEU'); -INSERT INTO stopwords VALUES ('PEUT'); -INSERT INTO stopwords VALUES ('POUR'); -INSERT INTO stopwords VALUES ('QUE'); -INSERT INTO stopwords VALUES ('QUI'); -INSERT INTO stopwords VALUES ('SA'); -INSERT INTO stopwords VALUES ('SES'); -INSERT INTO stopwords VALUES ('SI'); -INSERT INTO stopwords VALUES ('SIEN'); -INSERT INTO stopwords VALUES ('SON'); -INSERT INTO stopwords VALUES ('SOUS'); -INSERT INTO stopwords VALUES ('SUR'); -INSERT INTO stopwords VALUES ('TA'); -INSERT INTO stopwords VALUES ('TELS'); -INSERT INTO stopwords VALUES ('TES'); -INSERT INTO stopwords VALUES ('TON'); -INSERT INTO stopwords VALUES ('TU'); -INSERT INTO stopwords VALUES ('VOTRE'); -INSERT INTO stopwords VALUES ('VOUS'); -INSERT INTO stopwords VALUES ('VU'); - diff --git a/installer/data/mysql/fr-FR/1-Obligatoire/stopwords.txt b/installer/data/mysql/fr-FR/1-Obligatoire/stopwords.txt deleted file mode 100644 index 62be0351c8..0000000000 --- a/installer/data/mysql/fr-FR/1-Obligatoire/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -Mots vides de la langue française. \ No newline at end of file diff --git a/installer/data/mysql/it-IT/necessari/stopwords.sql b/installer/data/mysql/it-IT/necessari/stopwords.sql deleted file mode 100644 index 8f348a2e46..0000000000 --- a/installer/data/mysql/it-IT/necessari/stopwords.sql +++ /dev/null @@ -1,194 +0,0 @@ -SET FOREIGN_KEY_CHECKS=0; - -INSERT INTO `stopwords` (`word`) VALUES -('a'), -('about'), -('ad'), -('after'), -('ai'), -('al'), -('all'), -('alla'), -('alle'), -('allo'), -('also'), -('an'), -('and'), -('another'), -('any'), -('are'), -('as'), -('at'), -('b'), -('back'), -('be'), -('because'), -('been'), -('being'), -('but'), -('by'), -('c'), -('can'), -('ci'), -('col'), -('con'), -('could'), -('d'), -('da'), -('dagli'), -('dai'), -('dal'), -('dall'), -('dalla'), -('dalle'), -('dallo'), -('de'), -('degli'), -('dei'), -('del'), -('dell'), -('della'), -('delle'), -('dello'), -('di'), -('did'), -('do'), -('e'), -('each'), -('ed'), -('end'), -('et'), -('even'), -('f'), -('for'), -('fra'), -('from'), -('g'), -('get'), -('gli'), -('go'), -('h'), -('had'), -('have'), -('he'), -('her'), -('here'), -('his'), -('how'), -('however'), -('i'), -('if'), -('il'), -('in'), -('into'), -('is'), -('it'), -('j'), -('just'), -('k'), -('l'), -('la'), -('le'), -('lo'), -('m'), -('may'), -('me'), -('mi'), -('might'), -('more'), -('much'), -('must'), -('my'), -('n'), -('ne'), -('negli'), -('nel'), -('nell'), -('nella'), -('nello'), -('no'), -('non'), -('not'), -('o'), -('of'), -('off'), -('on'), -('only'), -('oppure'), -('or'), -('other'), -('our'), -('out'), -('over'), -('p'), -('per'), -('q'), -('r'), -('s'), -('saw'), -('si'), -('since'), -('should'), -('so'), -('some'), -('still'), -('su'), -('such'), -('sugli'), -('sui'), -('sul'), -('sull'), -('sulla'), -('sulle'), -('t'), -('te'), -('than'), -('that'), -('the'), -('their'), -('them'), -('then'), -('there'), -('these'), -('they'), -('this'), -('those'), -('ti'), -('to'), -('too'), -('tra'), -('try'), -('two'), -('u'), -('un'), -('una'), -('under'), -('uno'), -('up'), -('upon'), -('us'), -('v'), -('vi'), -('was'), -('we'), -('were'), -('what'), -('when'), -('where'), -('whether'), -('which'), -('while'), -('who'), -('why'), -('will'), -('with'), -('within'), -('without'), -('would'), -('x'), -('y'), -('you'), -('your'), -('z'); - -SET FOREIGN_KEY_CHECKS=1; \ No newline at end of file diff --git a/installer/data/mysql/it-IT/necessari/stopwords.txt b/installer/data/mysql/it-IT/necessari/stopwords.txt deleted file mode 100644 index 3f5fe86288..0000000000 --- a/installer/data/mysql/it-IT/necessari/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -Stopword. Possono essere modificate dopo l'installazione. diff --git a/installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.sql b/installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.sql deleted file mode 100644 index 0d4f9db0ca..0000000000 --- a/installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.sql +++ /dev/null @@ -1,26 +0,0 @@ --- --- Default classification sources and filing rules --- for Koha. --- --- Copyright (C) 2011 Magnus Enger Libriotech --- --- This file is part of Koha. --- --- Koha is free software; you can redistribute it and/or modify it under the --- terms of the GNU General Public License as published by the Free Software --- Foundation; either version 2 of the License, or (at your option) any later --- version. --- --- Koha is distributed in the hope that it will be useful, but WITHOUT ANY --- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR --- A PARTICULAR PURPOSE. See the GNU General Public License for more details. --- --- You should have received a copy of the GNU General Public License along --- with Koha; if not, write to the Free Software Foundation, Inc., --- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -INSERT INTO stopwords VALUES -('eller'), -('en'), -('og'), -('som'); diff --git a/installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.txt b/installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.txt deleted file mode 100644 index c119883249..0000000000 --- a/installer/data/mysql/nb-NO/1-Obligatorisk/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -Norske stoppord. Du kan endre disse etter at installasjonen er fullført. (NB! Vil ikke bli benyttet dersom du velger Zebra for indeksering.) diff --git a/installer/data/mysql/pl-PL/mandatory/stopwords.sql b/installer/data/mysql/pl-PL/mandatory/stopwords.sql deleted file mode 100644 index 1bf4725b68..0000000000 --- a/installer/data/mysql/pl-PL/mandatory/stopwords.sql +++ /dev/null @@ -1,99 +0,0 @@ -INSERT INTO stopwords VALUES -('a'), -('about'), -('also'), -('an'), -('and'), -('another'), -('any'), -('are'), -('as'), -('at'), -('back'), -('be'), -('because'), -('been'), -('being'), -('but'), -('by'), -('can'), -('could'), -('did'), -('do'), -('each'), -('end'), -('even'), -('for'), -('from'), -('get'), -('go'), -('had'), -('have'), -('he'), -('her'), -('here'), -('his'), -('how'), -('i'), -('if'), -('in'), -('into'), -('is'), -('it'), -('just'), -('may'), -('me'), -('might'), -('much'), -('must'), -('my'), -('no'), -('not'), -('of'), -('off'), -('on'), -('only'), -('or'), -('other'), -('our'), -('out'), -('should'), -('so'), -('some'), -('still'), -('such'), -('than'), -('that'), -('the'), -('their'), -('them'), -('then'), -('there'), -('these'), -('they'), -('this'), -('those'), -('to'), -('too'), -('try'), -('two'), -('under'), -('up'), -('us'), -('was'), -('we'), -('were'), -('what'), -('when'), -('where'), -('which'), -('while'), -('who'), -('why'), -('will'), -('with'), -('within'), -('without'), -('would'), -('you'), -('your'); diff --git a/installer/data/mysql/pl-PL/mandatory/stopwords.txt b/installer/data/mysql/pl-PL/mandatory/stopwords.txt deleted file mode 100644 index b4e17d8039..0000000000 --- a/installer/data/mysql/pl-PL/mandatory/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -Angielskie stop words. Możesz je zmienić po intalacji. diff --git a/installer/data/mysql/ru-RU/mandatory/stopwords.sql b/installer/data/mysql/ru-RU/mandatory/stopwords.sql deleted file mode 100644 index 3610a84a6e..0000000000 --- a/installer/data/mysql/ru-RU/mandatory/stopwords.sql +++ /dev/null @@ -1,21 +0,0 @@ -TRUNCATE stopwords; - -INSERT INTO stopwords VALUES -( 'к'), -( 'и'), -( 'в'), -( 'на'), -( 'да'), -( 'то'), -( 'где'), -( 'еле'), -( 'это'), -( 'что'), -( 'ведь'), -( 'даже'), -( 'почти'), -( 'такой'), -( 'также'), -( 'значит'), -( 'немного'), -( 'который'); \ No newline at end of file diff --git a/installer/data/mysql/ru-RU/mandatory/stopwords.txt b/installer/data/mysql/ru-RU/mandatory/stopwords.txt deleted file mode 100644 index f66cef3cd5..0000000000 --- a/installer/data/mysql/ru-RU/mandatory/stopwords.txt +++ /dev/null @@ -1 +0,0 @@ -Несущественные для поиска русские слова. Вы можете корректировать их после установки. diff --git a/installer/data/mysql/uk-UA/mandatory/stopwords.sql b/installer/data/mysql/uk-UA/mandatory/stopwords.sql deleted file mode 100644 index 8738fea2e6..0000000000 --- a/installer/data/mysql/uk-UA/mandatory/stopwords.sql +++ /dev/null @@ -1,29 +0,0 @@ -TRUNCATE stopwords; - -INSERT INTO stopwords VALUES -('адже'), -('авжеж'), -('в'), -('де'), -('дещо'), -('до'), -('й'), -('ледве'), -('майже'), -('на'), -('навіть'), -('отже'), -('отож'), -('під'), -('так'), -('такий'), -('також'), -('те'), -('тобто'), -('тож'), -('тощо'), -('у'), -('це'), -('що'), -('як'), -('який'); diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/prefs-admin-search.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/prefs-admin-search.inc index a224c896de..34de2d4b1c 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/includes/prefs-admin-search.inc +++ b/koha-tmpl/intranet-tmpl/prog/en/includes/prefs-admin-search.inc @@ -1,5 +1,5 @@
-

[% LibraryName %]

+

[% LibraryName %]

- diff --git a/koha-tmpl/intranet-tmpl/prog/en/includes/stopwords-admin-search.inc b/koha-tmpl/intranet-tmpl/prog/en/includes/stopwords-admin-search.inc deleted file mode 100644 index 964f1b683e..0000000000 --- a/koha-tmpl/intranet-tmpl/prog/en/includes/stopwords-admin-search.inc +++ /dev/null @@ -1,28 +0,0 @@ -
-

[% LibraryName %]

- -
- diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tt deleted file mode 100644 index d02565a40f..0000000000 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/stopwords.tt +++ /dev/null @@ -1,146 +0,0 @@ -[% INCLUDE 'doc-head-open.inc' %] -Koha › System administration › Stop words -[% IF ( add_form ) %]› [% IF ( searchfield ) %]Modify[% ELSE %]New[% END %] stop word -[% ELSIF ( add_validate ) %]› Data recorded -[% ELSIF ( delete_confirm ) %]› Delete stop word '[% searchfield %]' ? -[% ELSIF ( delete_confirmed ) %]› Data deleted -[% END %] - -[% INCLUDE 'doc-head-close.inc' %] - - - -[% INCLUDE 'header.inc' %] -[% INCLUDE 'stopwords-admin-search.inc' %] - - - -
- -
-
-
- -[% IF ( add_form ) %] - [% IF ( searchfield ) %] -

Modify word

- [% ELSE %] -

New word

- [% END %] -
- -
-
  1. - [% IF ( searchfield ) %] - Word - [% searchfield %] - [% ELSE %] - - - [% END %] -
  2. -
-
-
- - Cancel -
-
-[% END %] - -[% IF ( add_validate ) %] -

Data recorded

-
- -
-[% END %] - -[% IF ( delete_confirm ) %] -
-

Delete stop word '[% searchfield %]'

-
- - - -
-
- -
-[% END %] - -[% IF ( delete_confirmed ) %] -

Data deleted

-
- -
-[% END %] - -[% IF ( else ) %] - - - -

Stop words

-

NOTE : if you change something in this table, ask your administrator to run misc/batchRebuildBiblioTables.pl script.

- - [% IF ( searchfield ) %] -

You searched for [% searchfield %]

- [% END %] - - - - - - [% FOREACH loo IN loop %] - - - - - [% END %] -
Word
[% loo.word %]Delete
- -
- [% IF ( offsetgtzero ) %] - << Previous - [% END %] - [% IF ( ltcount ) %] - Next >> - [% END %] -
-[% END %] - -
-
-
-[% INCLUDE 'admin-menu.inc' %] -
-
-[% INCLUDE 'intranet-bottom.inc' %] diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tt b/koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tt index e011d1b0aa..b337c8c81b 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tt +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/catalogue/results.tt @@ -368,7 +368,6 @@ var holdForPatron = function () { [% IF ( CAN_user_editcatalogue_edit_catalogue ) %] [% END %]
- [% IF ( stopwords_removed ) %]

Ignored the following common words: "[% stopwords_removed %]"

[% END %] [% ELSE %]
diff --git a/misc/batchRebuildBiblioTables.pl b/misc/batchRebuildBiblioTables.pl index ae4f54eab2..59185fd926 100755 --- a/misc/batchRebuildBiblioTables.pl +++ b/misc/batchRebuildBiblioTables.pl @@ -53,7 +53,7 @@ $starttime = gettimeofday; #1st of all, find item MARC tag. my ($tagfield,$tagsubfield) = &GetMarcFromKohaField("items.itemnumber",''); -# $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write, stopwords write"); +# $dbh->do("lock tables biblio write, biblioitems write, items write, marc_biblio write, marc_subfield_table write, marc_blob_subfield write, marc_word write, marc_subfield_structure write"); my $sth = $dbh->prepare("SELECT biblionumber FROM biblio"); $sth->execute; # my ($biblionumbermax) = $sth->fetchrow; diff --git a/opac/opac-search.pl b/opac/opac-search.pl index efbbbb2fdd..bc703b3a0a 100755 --- a/opac/opac-search.pl +++ b/opac/opac-search.pl @@ -517,12 +517,12 @@ my $hits; my $expanded_facet = $params->{'expand'}; # Define some global variables -my ($error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type); +my ($error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type); my @results; ## I. BUILD THE QUERY -( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$stopwords_removed,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang); +( $error,$query,$simple_query,$query_cgi,$query_desc,$limit,$limit_cgi,$limit_desc,$query_type) = buildQuery(\@operators,\@operands,\@indexes,\@limits,\@sort_by, 0, $lang); sub _input_cgi_parse { my @elements; @@ -777,7 +777,6 @@ for (my $i=0;$i<@servers;$i++) { if ($query_desc || $limit_desc) { $template->param(searchdesc => 1); } - $template->param(stopwords_removed => "@$stopwords_removed") if $stopwords_removed; $template->param(results_per_page => $results_per_page); my $hide = C4::Context->preference('OpacHiddenItems'); $hide = ($hide =~ m/\S/) if $hide; # Just in case it has some spaces/new lines diff --git a/t/db_dependent/Search.t b/t/db_dependent/Search.t index cac37e88ab..cde59bd820 100644 --- a/t/db_dependent/Search.t +++ b/t/db_dependent/Search.t @@ -92,7 +92,6 @@ our $QueryStemming = 0; our $QueryAutoTruncate = 0; our $QueryWeightFields = 0; our $QueryFuzzy = 0; -our $QueryRemoveStopwords = 0; our $UseQueryParser = 0; our $marcflavour = 'MARC21'; our $contextmodule = new Test::MockModule('C4::Context'); @@ -108,8 +107,6 @@ $contextmodule->mock('preference', sub { return $QueryWeightFields; } elsif ($pref eq 'QueryFuzzy') { return $QueryFuzzy; - } elsif ($pref eq 'QueryRemoveStopwords') { - return $QueryRemoveStopwords; } elsif ($pref eq 'UseQueryParser') { return $UseQueryParser; } elsif ($pref eq 'maxRecordsForFacets') { @@ -212,22 +209,9 @@ sub run_marc21_search_tests { $QueryAutoTruncate = 0; $QueryWeightFields = 0; $QueryFuzzy = 0; - $QueryRemoveStopwords = 0; $UseQueryParser = 0; $marcflavour = 'MARC21'; - foreach my $string ("Leçon","modèles") { - my @results=C4::Search::_remove_stopwords($string,"kw"); - $debug && warn "$string ",Dump(@results); - ok($results[0] eq $string,"$string is not modified"); - } - - foreach my $string ("A book about the stars") { - my @results=C4::Search::_remove_stopwords($string,"kw"); - $debug && warn "$string ",Dump(@results); - ok($results[0] ne $string,"$results[0] from $string"); - } - my $indexes = C4::Search::getIndexes(); is(scalar(grep(/^ti$/, @$indexes)), 1, "Title index supported"); @@ -434,10 +418,10 @@ if ( $indexing_mode eq 'dom' ) { my ( $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ); + $query_type ); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'salud' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'salud' ], [], [], [], 0, 'en'); like($query, qr/kw\W.*salud/, "Built CCL keyword query"); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); @@ -449,7 +433,7 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([ 'and' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([ 'and' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en'); like($query, qr/kw\W.*salud\W.*and.*kw\W.*higiene/, "Built composed explicit-and CCL keyword query"); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); @@ -457,7 +441,7 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([ 'or' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([ 'or' ], [ 'salud', 'higiene' ], [], [], [], 0, 'en'); like($query, qr/kw\W.*salud\W.*or.*kw\W.*higiene/, "Built composed explicit-or CCL keyword query"); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); @@ -465,7 +449,7 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'salud', 'higiene' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'salud', 'higiene' ], [], [], [], 0, 'en'); like($query, qr/kw\W.*salud\W.*and.*kw\W.*higiene/, "Built composed implicit-and CCL keyword query"); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); @@ -473,7 +457,7 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'salud' ], [ 'kw' ], [ 'su-to:Laboratorios' ], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'salud' ], [ 'kw' ], [ 'su-to:Laboratorios' ], [], 0, 'en'); like($query, qr/kw\W.*salud\W*and\W*su-to\W.*Laboratorios/, "Faceted query generated correctly"); unlike($query_desc, qr/Laboratorios/, "Facets not included in query description"); @@ -483,7 +467,7 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-itype:MP', 'mc-itype:MU' ], [], 0, 'en'); + $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-itype:MP', 'mc-itype:MU' ], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 2, "getRecords generated mc-faceted search matched right number of records"); @@ -491,14 +475,14 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-loc:GEN', 'branch:FFL' ], [], 0, 'en'); + $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'mc-loc:GEN', 'branch:FFL' ], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 2, "getRecords generated multi-faceted search matched right number of records"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'NEKLS' ], [ 'Code-institution' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'NEKLS' ], [ 'Code-institution' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 12, 'search using index whose name contains "ns" returns expected results (bug 10271)'); @@ -506,12 +490,12 @@ if ( $indexing_mode eq 'dom' ) { $UseQueryParser = 1; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'book' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'book' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 101, "Search for 'book' with index set to 'kw' returns 101 hits"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([ 'and' ], [ 'book', 'another' ], [ 'kw', 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([ 'and' ], [ 'book', 'another' ], [ 'kw', 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 1, "Search for 'kw:book && kw:another' returns 1 hit"); $UseQueryParser = 0; @@ -520,7 +504,7 @@ if ( $indexing_mode eq 'dom' ) { # are just checking that it behaves consistently ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'available' ], [], 0, 'en'); + $query_type ) = buildQuery([], [ '' ], [ 'kw' ], [ 'available' ], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 26, "getRecords generated availability-limited search matched right number of records"); @@ -536,37 +520,37 @@ if ( $indexing_mode eq 'dom' ) { ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'pqf=@attr 1=_ALLRECORDS @attr 2=103 ""' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'pqf=@attr 1=_ALLRECORDS @attr 2=103 ""' ], [], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 180, "getRecords on _ALLRECORDS PQF returned all records"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'pqf=@attr 1=1016 "Lessig"' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'pqf=@attr 1=1016 "Lessig"' ], [], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 4, "getRecords PQF author search for Lessig returned proper number of matches"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'ccl=au:Lessig' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'ccl=au:Lessig' ], [], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 4, "getRecords CCL author search for Lessig returned proper number of matches"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'cql=dc.author any lessig' ], [], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'cql=dc.author any lessig' ], [], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 4, "getRecords CQL author search for Lessig returned proper number of matches"); - $QueryStemming = $QueryAutoTruncate = $QueryFuzzy = $QueryRemoveStopwords = 0; + $QueryStemming = $QueryAutoTruncate = $QueryFuzzy = 0; $QueryWeightFields = 1; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'salud' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'salud' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 19, "Weighted query returned correct number of results"); @@ -577,64 +561,64 @@ if ( $indexing_mode eq 'dom' ) { is(MARC::Record::new_from_xml($results_hashref->{biblioserver}->{RECORDS}->[0],'UTF-8')->title_proper(), 'Salud y seguridad de los trabajadores del sector salud: manual para gerentes y administradores^ies', "Weighted query returns best match first"); } - $QueryStemming = $QueryWeightFields = $QueryFuzzy = $QueryRemoveStopwords = 0; + $QueryStemming = $QueryWeightFields = $QueryFuzzy = 0; $QueryAutoTruncate = 1; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'medic' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'medic' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 5, "Search for 'medic' returns matches with automatic truncation on"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'medic*' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'medic*' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 5, "Search for 'medic*' returns matches with automatic truncation on"); - $QueryStemming = $QueryFuzzy = $QueryRemoveStopwords = $QueryAutoTruncate = 0; + $QueryStemming = $QueryFuzzy = $QueryAutoTruncate = 0; $QueryWeightFields = 1; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'web application' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'web application' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 1, "Search for 'web application' returns one hit with QueryWeightFields on"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'web "application' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'web "application' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 1, "Search for 'web \"application' returns one hit with QueryWeightFields on (bug 7518)"); - $QueryStemming = $QueryWeightFields = $QueryFuzzy = $QueryRemoveStopwords = $QueryAutoTruncate = 0; + $QueryStemming = $QueryWeightFields = $QueryFuzzy = $QueryAutoTruncate = 0; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'medic' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'medic' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, undef, "Search for 'medic' returns no matches with automatic truncation off"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'medic*' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'medic*' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 5, "Search for 'medic*' returns matches with automatic truncation off"); $QueryStemming = $QueryWeightFields = 1; - $QueryFuzzy = $QueryRemoveStopwords = $QueryAutoTruncate = 0; + $QueryFuzzy = $QueryAutoTruncate = 0; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'pressed' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'pressed' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, 7, "Search for 'pressed' returns matches when stemming (and query weighting) is on"); - $QueryStemming = $QueryWeightFields = $QueryFuzzy = $QueryRemoveStopwords = $QueryAutoTruncate = 0; + $QueryStemming = $QueryWeightFields = $QueryFuzzy = $QueryAutoTruncate = 0; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'pressed' ], [ 'kw' ], [], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'pressed' ], [ 'kw' ], [], [], 0, 'en'); ($error, $results_hashref, $facets_loop) = getRecords($query,$simple_query,[ ], [ 'biblioserver' ],20,0,undef,\%branches,\%itemtypes,$query_type,0); is($results_hashref->{biblioserver}->{hits}, undef, "Search for 'pressed' returns no matches when stemming is off"); @@ -718,49 +702,49 @@ if ( $indexing_mode eq 'dom' ) { $term = 'Arizona'; ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-br' ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ $term ], [ 'su-br' ], [ ], [], 0, 'en'); matchesExplodedTerms("Advanced search for broader subjects", $query, 'Arizona', 'United States'); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-na' ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ $term ], [ 'su-na' ], [ ], [], 0, 'en'); matchesExplodedTerms("Advanced search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County'); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ $term ], [ 'su-rl' ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ $term ], [ 'su-rl' ], [ ], [], 0, 'en'); matchesExplodedTerms("Advanced search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County'); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ "$term", 'history' ], [ 'su-rl', 'kw' ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ "$term", 'history' ], [ 'su-rl', 'kw' ], [ ], [], 0, 'en'); matchesExplodedTerms("Advanced search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County'); like($query, qr/history/, "Advanced search for related subjects and keyword 'history' searches for 'history'"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ 'history', "$term" ], [ 'kw', 'su-rl' ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ 'history', "$term" ], [ 'kw', 'su-rl' ], [ ], [], 0, 'en'); matchesExplodedTerms("Order of terms doesn't matter for advanced search", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County'); like($query, qr/history/, "Order of terms doesn't matter for advanced search"); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ "su-br($term)" ], [ ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ "su-br($term)" ], [ ], [ ], [], 0, 'en'); matchesExplodedTerms("Simple search for broader subjects", $query, 'Arizona', 'United States'); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ "su-na($term)" ], [ ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ "su-na($term)" ], [ ], [ ], [], 0, 'en'); matchesExplodedTerms("Simple search for narrower subjects", $query, 'Arizona', 'Maricopa County', 'Navajo County', 'Pima County'); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ "su-rl($term)" ], [ ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ "su-rl($term)" ], [ ], [ ], [], 0, 'en'); matchesExplodedTerms("Simple search for related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County'); ( $error, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery([], [ "history && su-rl($term)" ], [ ], [ ], [], 0, 'en'); + $query_type ) = buildQuery([], [ "history && su-rl($term)" ], [ ], [ ], [], 0, 'en'); matchesExplodedTerms("Simple search for related subjects and keyword 'history' searches related subjects", $query, 'Arizona', 'United States', 'Maricopa County', 'Navajo County', 'Pima County'); like($query, qr/history/, "Simple search for related subjects and keyword 'history' searches for 'history'"); @@ -907,7 +891,6 @@ sub run_unimarc_search_tests { $QueryAutoTruncate = 0; $QueryWeightFields = 0; $QueryFuzzy = 0; - $QueryRemoveStopwords = 0; $UseQueryParser = 0; $marcflavour = 'UNIMARC'; diff --git a/test/search.pl b/test/search.pl index 4362791b74..5d4f6e8d76 100755 --- a/test/search.pl +++ b/test/search.pl @@ -37,7 +37,6 @@ foreach ( @SEARCH ) { $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type ) = buildQuery( $_->{operators}, $_->{operands}, $_->{indexes}, $_->{limits}, $_->{sort_by}, 0, $_->{lang} ); @@ -64,9 +63,6 @@ foreach ( @SEARCH ) { $expected = $_->{limit_desc}; push @mismatch, "Limit desc: $limit_desc (not: $expected)" unless $limit_desc eq $expected; - $expected = $_->{stopwords_removed}; - push @mismatch, "Stopwords removed: $stopwords_removed (not: $expected)" unless $stopwords_removed eq $expected; - $expected = $_->{query_type}; push @mismatch, "Query Type: $query_type (not: $expected)" unless $query_type eq $expected; -- 2.39.5