From 37340e371857549e480c3814b2d0f9e10f58ca43 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Fr=C3=A9d=C3=A9rick=20Capovilla?=
 <frederick.capovilla@libeo.com>
Date: Wed, 28 Sep 2011 13:58:14 -0400
Subject: [PATCH] Normalize records imported from Z39.50 servers.

Some Z39.50 server may use the MARC-8 encoding, which uses separated
diacritics. By forcing a normalization, all imported records will have
combined diacritics.

Records with separated diacritics might not show up in Zebra searches if
the search terms use accented characters.

Signed-off-by: Marcel de Rooy <m.de.rooy@rijksmuseum.nl>

http://bugs.koha-community.org/show_bug.cgi?id=8610
Signed-off-by: Paul Poulain <paul.poulain@biblibre.com>
checked it still works after the patch with UNIMARC and BNF server (that
provide utf-8 records)
---
 C4/Breeding.pm              | 3 +++
 C4/ImportBatch.pm           | 4 ++++
 acqui/z3950_search.pl       | 4 ++++
 cataloguing/z3950_search.pl | 4 ++++
 4 files changed, 15 insertions(+)

diff --git a/C4/Breeding.pm b/C4/Breeding.pm
index ea3e90215f..9dedc4d08b 100644
--- a/C4/Breeding.pm
+++ b/C4/Breeding.pm
@@ -94,6 +94,9 @@ sub ImportBreeding {
         ($marcrecord, $charset_result, $charset_errors) = 
             MarcToUTF8Record($marcarray[$i]."\x1D", C4::Context->preference("marcflavour"), $encoding);
         
+        # Normalize the record so it doesn't have separated diacritics
+        SetUTF8Flag($marcrecord);
+
 #         warn "$i : $marcarray[$i]";
         # FIXME - currently this does nothing 
         my @warnings = $marcrecord->warnings();
diff --git a/C4/ImportBatch.pm b/C4/ImportBatch.pm
index 648d96e734..15f1acbc4a 100644
--- a/C4/ImportBatch.pm
+++ b/C4/ImportBatch.pm
@@ -382,6 +382,10 @@ sub  BatchStageMarcRecords {
         if (scalar($marc_record->fields()) == 0) {
             push @invalid_records, $marc_blob;
         } else {
+
+            # Normalize the record so it doesn't have separated diacritics
+            SetUTF8Flag($marc_record);
+
             $num_valid++;
             if ($record_type eq 'biblio') {
                 $import_record_id = AddBiblioToBatch($batch_id, $rec_num, $marc_record, $encoding, int(rand(99999)), 0);
diff --git a/acqui/z3950_search.pl b/acqui/z3950_search.pl
index a1cdce4120..8da89b6645 100755
--- a/acqui/z3950_search.pl
+++ b/acqui/z3950_search.pl
@@ -281,6 +281,10 @@ warn "query ".$query  if $DEBUG;
 ## In HEAD i change everything to UTF-8
 # In rel2_2 i am not sure what encoding is so no character conversion is done here
 ##Add necessary encoding changes to here -TG
+
+                        # Normalize the record so it doesn't have separated diacritics
+                        SetUTF8Flag($marcrecord);
+
                         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, "" );
                         $oldbiblio->{isbn}   =~ s/ |-|\.//g if $oldbiblio->{isbn};
                         # pad | and ( with spaces to allow line breaks in the HTML
diff --git a/cataloguing/z3950_search.pl b/cataloguing/z3950_search.pl
index 5e22636b5a..fe07a7d42c 100755
--- a/cataloguing/z3950_search.pl
+++ b/cataloguing/z3950_search.pl
@@ -249,6 +249,10 @@ warn "query ".$query  if $DEBUG;
 ## In HEAD i change everything to UTF-8
 # In rel2_2 i am not sure what encoding is so no character conversion is done here
 ##Add necessary encoding changes to here -TG
+
+                        # Normalize the record so it doesn't have separated diacritics
+                        SetUTF8Flag($marcrecord);
+
                         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, "" );
                         $oldbiblio->{isbn}   =~ s/ |-|\.//g if $oldbiblio->{isbn};
                         # pad | and ( with spaces to allow line breaks in the HTML
-- 
2.39.5