From d5f5d844c5ed97fb94adabb1c94b93b0ad5dae23 Mon Sep 17 00:00:00 2001
From: Henri-Damien LAURENT <henridamien.laurent@biblibre.com>
Date: Wed, 27 Jan 2010 17:51:08 +0100
Subject: [PATCH] (bug #4020) XSLT unimarc display

When using XSLT Display, and UNIMARC,
since marcFlavour is not used in encoding data, when data is true utf8, as_xml
fails on some subfields.

Moreover, because transformMARCXMLForXSLT edits some values in the marc record
and the PERL UTF8 is not handled by MARC::File::USMARC, it endsup in double
encoding the data.
Sending a patch to fix both issues.

This patch adds
    - two functions in  C4/Charset.pm
        NormalizeString (uses Unicode::Normalize)
        SetUTF8Flag (This function in my opinion belongs to MARC::Record, or at least MARC::File::USMARC)
    - edits C4::XSLT in order to cope with the correct marcflavour
    - edits C4::Search searchResults to use setUTF8Flag

Conflicts solved:

	C4/Charset.pm
	C4/Search.pm
	C4/XSLT.pm
---
 C4/Charset.pm | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++
 C4/Search.pm  |  4 ++-
 C4/XSLT.pm    | 18 +++++++++--
 3 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/C4/Charset.pm b/C4/Charset.pm
index b65627708d..839a9204de 100644
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -20,6 +20,8 @@ package C4::Charset;
 use strict;
 use MARC::Charset qw/marc8_to_utf8/;
 use Text::Iconv;
+use C4::Debug;
+use Unicode::Normalize;
 
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
 
@@ -31,6 +33,7 @@ BEGIN {
     @EXPORT = qw(
         IsStringUTF8ish
         MarcToUTF8Record
+        SetUTF8Flag
         SetMarcUnicodeFlag
         StripNonXmlChars
     );
@@ -108,6 +111,86 @@ sub IsStringUTF8ish {
     return utf8::decode($str);
 }
 
+=head2 SetUTF8Flag
+
+=over 4
+
+my $marc_record = SetUTF8Flag($marc_record);
+
+=back
+
+This function sets the PERL UTF8 flag for data.
+It is required when using new_from_usmarc 
+since MARC::File::USMARC does not handle PERL UTF8 setting.
+When editing unicode marc records fields and subfields, you
+would end up in double encoding without using this function. 
+
+FIXME
+In my opinion, this function belongs to MARC::Record and not
+to this package.
+But since it handles charset, and MARC::Record, it finds its way in that package
+
+=cut
+
+sub SetUTF8Flag{
+	my ($record)=@_;
+	return unless ($record && $record->fields());
+	foreach my $field ($record->fields()){
+		if ($field->tag()>=10){
+			my @subfields;
+			foreach my $subfield ($field->subfields()){
+				push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
+			}
+			my $newfield=MARC::Field->new(
+							$field->tag(),
+							$field->indicator(1),
+							$field->indicator(2),
+							@subfields
+						);
+			$field->replace_with($newfield);
+		}
+	}
+}
+
+=head2 NormalizeString
+
+=over 4
+
+    my $normalized_string=NormalizeString($string);
+
+=back
+	Given 
+	    a string
+        nfc : If you want to set NFC and not NFD
+        transform : If you expect all the signs to be removed
+    Sets the PERL UTF8 Flag on your initial data if need be
+    and applies cleaning if required 
+    
+	Returns a utf8 NFD normalized string
+	
+	Sample code :
+	my $string=NormalizeString ("l'ornithoptÃ¨re");
+    #results into ornithoptÃ¨re in NFD form and sets UTF8 Flag
+=cut
+
+sub NormalizeString{
+	my ($string,$nfc,$transform)=@_;
+	utf8::decode($string) unless (utf8::is_utf8($string));
+	if ($nfc){
+		$string= NFD($string);
+	}
+	else {
+		$string=NFC($string);
+	}
+	if ($transform){
+    $string=~s/\<|\>|\^|\;|\.|\?|,|\-|\(|\)|\[|\]|\{|\}|\$|\%|\!|\*|\:|\\|\/|\&|\"|\'/ /g;
+	#removing one letter words "d'" "l'"  was changed into "d " "l " 
+    $string=~s/\b\S\b//g;
+    $string=~s/\s+$//g;
+	}
+    return $string; 
+}
+
 =head2 MarcToUTF8Record
 
 =over 4
diff --git a/C4/Search.pm b/C4/Search.pm
index 39a111f4a5..3f4ee1563c 100644
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -30,6 +30,7 @@ use C4::Branch;
 use C4::Debug;
 use YAML;
 use URI::Escape;
+use C4::Charset;
 
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
 
@@ -446,6 +447,7 @@ sub getRecords {
                     # not an index scan
                     else {
                         $record = $results[ $i - 1 ]->record($j)->raw();
+            		warn $results[$i-1]->record($j)->render() ;
 
                         # warn "RECORD $j:".$record;
                         $results_hash->{'RECORDS'}[$j] = $record;
@@ -1405,6 +1407,7 @@ sub searchResults {
     # loop through all of the records we've retrieved
     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
+	    SetUTF8Flag($marcrecord);
 		my $biblionumber;
 
         if(not $scan){
@@ -1672,7 +1675,6 @@ sub searchResults {
         }
 
         # XSLT processing of some stuff
-        # FIXME : This needs some work in order to be more flexible : Can not use a result list for intranet different from OPAC
         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
                 $oldbiblio->{biblionumber}, $marcrecord, C4::Context->preference("XSLTResultsDisplay") );
diff --git a/C4/XSLT.pm b/C4/XSLT.pm
index cd53e313f1..780789d79d 100644
--- a/C4/XSLT.pm
+++ b/C4/XSLT.pm
@@ -138,11 +138,23 @@ sub XSLTParse4Display {
     my $record = transformMARCXML4XSLT($biblionumber, $orig_record);
     #return $record->as_formatted();
     my $itemsxml  = buildKohaItemsNamespace($biblionumber);
-    my $xmlrecord = $record->as_xml();
-    $xmlrecord =~ s/\<\/record\>/$itemsxml\<\/record\>/;
+    my $xmlrecord = $record->as_xml(C4::Context->preference('marcflavour'));
+    my $sysxml = "";
+    warn $xmlrecord;
+    foreach my $syspref ( qw/OPACURLOpenInNewWindow DisplayOPACiconsXSLT URLLinkText/ ) {
+        if (C4::Context->preference( $syspref ) ){
+        $sysxml .= "<syspref name=\"$syspref\">" .
+                   C4::Context->preference( $syspref ) .
+                   "</syspref>\n";
+        }
+    }
+    $sysxml = "<sysprefs>\n".$sysxml."</sysprefs>\n" if length($sysxml);
+    $xmlrecord =~ s/\<\/record\>/$itemsxml$sysxml\<\/record\>/;
+    $xmlrecord =~ s/\& /\&amp\; /;
+
     my $parser = XML::LibXML->new();
     # don't die when you find &, >, etc
-    $parser->recover_silently(1);
+    $parser->recover_silently(0);
     my $source = $parser->parse_string($xmlrecord);
     unless ( $stylesheet->{$xslfilename} ) {
         my $xslt = XML::LibXSLT->new();
-- 
2.39.5