From 1cd11f4d54bf093294418dfe43bc0c55cbe26335 Mon Sep 17 00:00:00 2001
From: Paul POULAIN <paul@koha-fr.org>
Date: Tue, 30 Oct 2007 13:02:52 -0500
Subject: [PATCH] fixes in NoZebra search & indexing

- the quotemeta was wrong (and introduced some bugs in diacritics)
- fixing some bugs that appear only sometimes : the union was done including weight, which is wrong & resulted in missing some results (when various weighting)

Signed-off-by: Chris Cormack <crc@liblime.com>
Signed-off-by: Joshua Ferraro <jmf@liblime.com>
---
 C4/Search.pm                            | 40 +++++++++++++++++--------
 misc/migration_tools/rebuild_nozebra.pl |  1 -
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/C4/Search.pm b/C4/Search.pm
index 1f7cd982d1..5069f3861b 100644
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -1177,7 +1177,7 @@ sub NZgetRecords {
 sub NZanalyse {
     my ($string,$server) = @_;
     # $server contains biblioserver or authorities, depending on what we search on.
-    #warn "querying : $string on $server";
+    warn "querying : $string on $server";
     $server='biblioserver' unless $server;
     # if we have a ", replace the content to discard temporarily any and/or/not inside
     my $commacontent;
@@ -1247,12 +1247,12 @@ sub NZanalyse {
         my $right = $3;
         my $results;
         # automatic replace for short operators
-        $left='title' if $left eq 'ti';
-        $left='author' if $left eq 'au';
-        $left='publisher' if $left eq 'pb';
-        $left='subject' if $left eq 'su';
-        $left='koha-Auth-Number' if $left eq 'an';
-        $left='keyword' if $left eq 'kw';
+        $left='title' if $left =~ '^ti';
+        $left='author' if $left =~ '^au';
+        $left='publisher' if $left =~ '^pb';
+        $left='subject' if $left =~ '^su';
+        $left='koha-Auth-Number' if $left =~ '^an';
+        $left='keyword' if $left =~ '^kw';
         if ($operator) {
             #do a specific search
             my $dbh = C4::Context->dbh;
@@ -1273,9 +1273,15 @@ sub NZanalyse {
                 if ($results) {
                     my @leftresult = split /;/, $biblionumbers;
                     my $temp;
-                    foreach (@leftresult) {
-                        if ($results =~ "$_;") {
-                            $temp .= "$_;$_;";
+                    foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
+                        # remove weight at the end
+                        my $cleaned = $entry;
+                        $cleaned =~ s/-\d*$//;
+                        # if the entry already in the hash, take it & increase weight
+#                         warn "===== $cleaned =====";
+                        if ($results =~ "$cleaned") {
+                            $temp .= "$entry;$entry;";
+#                             warn "INCLUDING $entry";
                         }
                     }
                     $results = $temp;
@@ -1299,15 +1305,23 @@ sub NZanalyse {
                 }
                 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
                 if ($results) {
+#                 warn "RES for $_ = $biblionumbers";
                     my @leftresult = split /;/, $biblionumbers;
                     my $temp;
-                    foreach (@leftresult) {
-                        if ($results =~ "$_;") {
-                            $temp .= "$_;$_;";
+                    foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight
+                        # remove weight at the end
+                        my $cleaned = $entry;
+                        $cleaned =~ s/-\d*$//;
+                        # if the entry already in the hash, take it & increase weight
+#                         warn "===== $cleaned =====";
+                        if ($results =~ "$cleaned") {
+                            $temp .= "$entry;$entry;";
+#                             warn "INCLUDING $entry";
                         }
                     }
                     $results = $temp;
                 } else {
+#                 warn "NEW RES for $_ = $biblionumbers";
                     $results = $biblionumbers;
                 }
             }
diff --git a/misc/migration_tools/rebuild_nozebra.pl b/misc/migration_tools/rebuild_nozebra.pl
index 4ddb2f8146..9e8407be95 100755
--- a/misc/migration_tools/rebuild_nozebra.pl
+++ b/misc/migration_tools/rebuild_nozebra.pl
@@ -100,7 +100,6 @@ while (my ($biblionumber) = $sth->fetchrow) {
 
     # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values
     $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
-    $title = quotemeta $title;
     # limit to 10 char, should be enough, and limit the DB size
     $title = substr($title,0,10);
     #parse each field