From b36ddc5feb5a4dea87c3a267c1a4aa9e5e1be3df Mon Sep 17 00:00:00 2001 From: Joshua Ferraro Date: Sat, 6 Oct 2007 00:23:43 -0500 Subject: [PATCH] adding better english stopwords Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- installer/data/en/mandatory/stopwords.sql | 106 ++++++++++++++++++++-- installer/data/en/mandatory/stopwords.txt | 3 +- 2 files changed, 100 insertions(+), 9 deletions(-) diff --git a/installer/data/en/mandatory/stopwords.sql b/installer/data/en/mandatory/stopwords.sql index 30a9f1c6b6..1bf4725b68 100644 --- a/installer/data/en/mandatory/stopwords.sql +++ b/installer/data/en/mandatory/stopwords.sql @@ -1,7 +1,99 @@ - -# -# table `stopwords` -# - -INSERT INTO stopwords VALUES ('THE'); - +INSERT INTO stopwords VALUES +('a'), +('about'), +('also'), +('an'), +('and'), +('another'), +('any'), +('are'), +('as'), +('at'), +('back'), +('be'), +('because'), +('been'), +('being'), +('but'), +('by'), +('can'), +('could'), +('did'), +('do'), +('each'), +('end'), +('even'), +('for'), +('from'), +('get'), +('go'), +('had'), +('have'), +('he'), +('her'), +('here'), +('his'), +('how'), +('i'), +('if'), +('in'), +('into'), +('is'), +('it'), +('just'), +('may'), +('me'), +('might'), +('much'), +('must'), +('my'), +('no'), +('not'), +('of'), +('off'), +('on'), +('only'), +('or'), +('other'), +('our'), +('out'), +('should'), +('so'), +('some'), +('still'), +('such'), +('than'), +('that'), +('the'), +('their'), +('them'), +('then'), +('there'), +('these'), +('they'), +('this'), +('those'), +('to'), +('too'), +('try'), +('two'), +('under'), +('up'), +('us'), +('was'), +('we'), +('were'), +('what'), +('when'), +('where'), +('which'), +('while'), +('who'), +('why'), +('will'), +('with'), +('within'), +('without'), +('would'), +('you'), +('your'); diff --git a/installer/data/en/mandatory/stopwords.txt b/installer/data/en/mandatory/stopwords.txt index 74cdb77c43..e4dbf0f2c8 100644 --- a/installer/data/en/mandatory/stopwords.txt +++ b/installer/data/en/mandatory/stopwords.txt @@ -1,2 +1 @@ -English stop words -(contains only THE, please suggest a more complete file) +English stop words. You can change this after installation. -- 2.39.5