From 1cd11f4d54bf093294418dfe43bc0c55cbe26335 Mon Sep 17 00:00:00 2001 From: Paul POULAIN Date: Tue, 30 Oct 2007 13:02:52 -0500 Subject: [PATCH] fixes in NoZebra search & indexing - the quotemeta was wrong (and introduced some bugs in diacritics) - fixing some bugs that appear only sometimes : the union was done including weight, which is wrong & resulted in missing some results (when various weighting) Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Search.pm | 40 +++++++++++++++++-------- misc/migration_tools/rebuild_nozebra.pl | 1 - 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/C4/Search.pm b/C4/Search.pm index 1f7cd982d1..5069f3861b 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -1177,7 +1177,7 @@ sub NZgetRecords { sub NZanalyse { my ($string,$server) = @_; # $server contains biblioserver or authorities, depending on what we search on. - #warn "querying : $string on $server"; + warn "querying : $string on $server"; $server='biblioserver' unless $server; # if we have a ", replace the content to discard temporarily any and/or/not inside my $commacontent; @@ -1247,12 +1247,12 @@ sub NZanalyse { my $right = $3; my $results; # automatic replace for short operators - $left='title' if $left eq 'ti'; - $left='author' if $left eq 'au'; - $left='publisher' if $left eq 'pb'; - $left='subject' if $left eq 'su'; - $left='koha-Auth-Number' if $left eq 'an'; - $left='keyword' if $left eq 'kw'; + $left='title' if $left =~ '^ti'; + $left='author' if $left =~ '^au'; + $left='publisher' if $left =~ '^pb'; + $left='subject' if $left =~ '^su'; + $left='koha-Auth-Number' if $left =~ '^an'; + $left='keyword' if $left =~ '^kw'; if ($operator) { #do a specific search my $dbh = C4::Context->dbh; @@ -1273,9 +1273,15 @@ sub NZanalyse { if ($results) { my @leftresult = split /;/, $biblionumbers; my $temp; - foreach (@leftresult) { - if ($results =~ "$_;") { - $temp .= "$_;$_;"; + foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight + # remove weight at the end + my $cleaned = $entry; + $cleaned =~ s/-\d*$//; + # if the entry already in the hash, take it & increase weight +# warn "===== $cleaned ====="; + if ($results =~ "$cleaned") { + $temp .= "$entry;$entry;"; +# warn "INCLUDING $entry"; } } $results = $temp; @@ -1299,15 +1305,23 @@ sub NZanalyse { } # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { +# warn "RES for $_ = $biblionumbers"; my @leftresult = split /;/, $biblionumbers; my $temp; - foreach (@leftresult) { - if ($results =~ "$_;") { - $temp .= "$_;$_;"; + foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight + # remove weight at the end + my $cleaned = $entry; + $cleaned =~ s/-\d*$//; + # if the entry already in the hash, take it & increase weight +# warn "===== $cleaned ====="; + if ($results =~ "$cleaned") { + $temp .= "$entry;$entry;"; +# warn "INCLUDING $entry"; } } $results = $temp; } else { +# warn "NEW RES for $_ = $biblionumbers"; $results = $biblionumbers; } } diff --git a/misc/migration_tools/rebuild_nozebra.pl b/misc/migration_tools/rebuild_nozebra.pl index 4ddb2f8146..9e8407be95 100755 --- a/misc/migration_tools/rebuild_nozebra.pl +++ b/misc/migration_tools/rebuild_nozebra.pl @@ -100,7 +100,6 @@ while (my ($biblionumber) = $sth->fetchrow) { # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g; - $title = quotemeta $title; # limit to 10 char, should be enough, and limit the DB size $title = substr($title,0,10); #parse each field