From 7006c555ac5ac08ea3f81f0867351794212dd0a5 Mon Sep 17 00:00:00 2001 From: Henri-Damien LAURENT Date: Thu, 23 Jul 2009 18:47:41 +0200 Subject: [PATCH] _remove_stopwords in C4::Search had some issues For some reason, it would not really do an exact match on stopwords but would also prune some other part of words Signed-off-by: Galen Charlton --- C4/Search.pm | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/C4/Search.pm b/C4/Search.pm index 8bf75ac0aa..f72bb11571 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -632,19 +632,16 @@ sub _remove_stopwords { # we use IsAlpha unicode definition, to deal correctly with diacritics. # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le" # is a stopword, we'd get "çon" and wouldn't find anything... - foreach ( keys %{ C4::Context->stopwords } ) { - next if ( $_ =~ /(and|or|not)/ ); # don't remove operators - if ( $operand =~ - /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ ) - { - $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi; - $operand =~ s/^$_\P{IsAlpha}/ /gi; - $operand =~ s/\P{IsAlpha}$_$/ /gi; - $operand =~ s/$1//gi; - push @stopwords_removed, $_; - } - } - } + foreach ( keys %{ C4::Context->stopwords } ) { + next if ( $_ =~ /(and|or|not)/ ); # don't remove operators + if ( my ($matched) = ($operand =~ + /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) ) + { + $operand =~ s/\Q$matched\E/ /gi; + push @stopwords_removed, $_; + } + } + } return ( $operand, \@stopwords_removed ); } -- 2.39.5