From cfea1725448518f12056bff63ec5624ab82d5afb Mon Sep 17 00:00:00 2001
From: Galen Charlton <galen.charlton@liblime.com>
Date: Mon, 31 Mar 2008 11:57:18 -0500
Subject: [PATCH] work around issue in MARC::Charset

Because of a bug in MARC::Charset 0.98, if a string to convert from
MARC-8 to UTF-8 has (a) one or more diacritics that (b) are only in character positions
128 to 255 inclusive, the resulting converted string is not in
UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1).  As a result,
when such a record is converted to XML using ->as_xml_record(), the resulting
XML can be truncated at the offending character.  An example of such a record
is one that has a price in Briish pounds in the 260$c but no other diacritics.

Signed-off-by: Joshua Ferraro <jmf@liblime.com>
---
 C4/Charset.pm | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/C4/Charset.pm b/C4/Charset.pm
index 2e2e83153a..a676b7ccf3 100644
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -419,6 +419,16 @@ sub _marc_marc8_to_utf8 {
             my @converted_subfields;
             foreach my $subfield ($field->subfields()) {
                 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
+                unless (IsStringUTF8ish($utf8sf)) {
+                    # Because of a bug in MARC::Charset 0.98, if the string
+                    # has (a) one or more diacritics that (b) are only in character positions
+                    # 128 to 255 inclusive, the resulting converted string is not in
+                    # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1).  If that
+                    # occurs, upgrade the string in place.  Moral of the story seems to be
+                    # that pack("U", ...) is better than chr(...) if you need to guarantee
+                    # that the resulting string is UTF-8.
+                    utf8::upgrade($utf8sf);
+                }
                 push @converted_subfields, $subfield->[0], $utf8sf;
             }
 
-- 
2.39.5