From cf81242caeda06fa925942f109bd0acaf7d0e273 Mon Sep 17 00:00:00 2001 From: Paul POULAIN Date: Tue, 26 Feb 2008 09:30:21 +1300 Subject: [PATCH] NoZebra fixes : removing \r and \n when indexing Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Biblio.pm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 86f6310379..b07073e83a 100755 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -2269,7 +2269,7 @@ sub _AddBiblioNoZebra { } # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values - $title =~ s/ |\.|,|;|\[|\]|\(|\)|\*|-|'|:|=//g; + $title =~ s/ |\.|,|;|\[|\]|\(|\)|\*|-|'|:|=|\r|\n//g; # limit to 10 char, should be enough, and limit the DB size $title = substr($title,0,10); #parse each field @@ -2281,6 +2281,7 @@ sub _AddBiblioNoZebra { my $tag = $field->tag(); my $subfieldcode = $subfield->[0]; my $indexed=0; + warn "INDEXING :".$subfield->[1]; # check each index to see if the subfield is stored somewhere # otherwise, store it in __RAW__ index foreach my $key (keys %index) { @@ -2289,7 +2290,7 @@ sub _AddBiblioNoZebra { $indexed=1; my $line= lc $subfield->[1]; # remove meaningless value in the field... - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:|\r|\n/ /g; # ... and split in words foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) @@ -2322,7 +2323,7 @@ sub _AddBiblioNoZebra { # the subfield is not indexed, store it in __RAW__ index anyway unless ($indexed) { my $line= lc $subfield->[1]; - $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g; + $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:|\r|\n/ /g; # ... and split in words foreach (split / /,$line) { next unless $_; # skip empty values (multiple spaces) -- 2.39.2