From 55107741a2c9f6e9f870f3043b7e20160cfeed3e Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Fri, 14 Mar 2014 17:09:09 +0100 Subject: [PATCH] Bug 11944: replace use of utf8 with Encode See the wiki page for the explanation. Signed-off-by: Paola Rossi Signed-off-by: Bernardo Gonzalez Kriegel Signed-off-by: Dobrica Pavlinusic Signed-off-by: Martin Renvoize Signed-off-by: Tomas Cohen Arazi --- C4/Biblio.pm | 7 ++----- C4/Charset.pm | 7 ++++--- C4/Installer.pm | 7 +++---- C4/ItemType.pm | 5 +++-- C4/Output.pm | 2 -- C4/Search.pm | 6 +++--- Koha/Template/Plugin/EncodeUTF8.pm | 4 ++-- cataloguing/value_builder/macles.pl | 4 ---- reports/guided_reports.pl | 3 ++- serials/serials-edit.pl | 3 ++- t/Charset.t | 14 ++++++++------ 11 files changed, 29 insertions(+), 33 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index 8bdb25f1f4..290efeca9e 100644 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -23,7 +23,7 @@ use strict; use warnings; use Carp; -# use utf8; +use Encode qw( decode ); use MARC::Record; use MARC::File::USMARC; use MARC::File::XML; @@ -2336,9 +2336,6 @@ sub TransformHtmlToXml { @$values[$i] =~ s/"/"/g; @$values[$i] =~ s/'/'/g; - # if ( !utf8::is_utf8( @$values[$i] ) ) { - # utf8::decode( @$values[$i] ); - # } if ( ( @$tags[$i] ne $prevtag ) ) { $j++ unless ( @$tags[$i] eq "" ); my $indicator1 = eval { substr( @$indicator[$j], 0, 1 ) }; @@ -2473,7 +2470,7 @@ sub TransformHtmlToMarc { foreach my $param_name ( keys %$cgi_params ) { if ( $param_name =~ /^tag_/ ) { my $param_value = $cgi_params->{$param_name}; - if ( utf8::decode($param_value) ) { + if ( $param_value = Encode::decode('UTF-8', $param_value) ) { $cgi_params->{$param_name} = $param_value; } diff --git a/C4/Charset.pm b/C4/Charset.pm index 6bfd935d83..11dde8724d 100644 --- a/C4/Charset.pm +++ b/C4/Charset.pm @@ -25,6 +25,7 @@ use Text::Iconv; use C4::Context; use C4::Debug; use Unicode::Normalize; +use Encode qw( decode encode is_utf8 ); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @@ -111,8 +112,8 @@ will assume that this situation occur does not very often. sub IsStringUTF8ish { my $str = shift; - return 1 if utf8::is_utf8($str); - return utf8::decode($str); + return 1 if Encode::is_utf8($str); + return Encode::decode('UTF-8', $str); } =head2 SetUTF8Flag @@ -180,7 +181,7 @@ Sample code : sub NormalizeString{ my ($string,$nfd,$transform)=@_; return $string unless defined($string); # force scalar context return. - utf8::decode($string) unless (utf8::is_utf8($string)); + $string = Encode::decode('UTF-8', $string) unless (Encode::is_utf8($string)); if ($nfd){ $string= NFD($string); } diff --git a/C4/Installer.pm b/C4/Installer.pm index 300697a326..58fd6e8ae2 100644 --- a/C4/Installer.pm +++ b/C4/Installer.pm @@ -20,6 +20,7 @@ package C4::Installer; use strict; #use warnings; FIXME - Bug 2505 +use Encode qw( encode is_utf8 ); our $VERSION = 3.07.00.049; use C4::Context; use C4::Installer::PerlModules; @@ -136,8 +137,7 @@ sub marc_framework_sql_list { open my $fh, "<:encoding(UTF-8)", "$dir/$requirelevel/$name.txt"; my $lines = <$fh>; $lines =~ s/\n|\r/
/g; - use utf8; - utf8::encode($lines) unless ( utf8::is_utf8($lines) ); + $lines = Encode::encode('UTF-8', $lines) unless ( Encode::is_utf8($lines) ); my $mandatory = ($requirelevel =~ /(mandatory|requi|oblig|necess)/i); push @frameworklist, { @@ -214,8 +214,7 @@ sub sample_data_sql_list { open my $fh , "<:encoding(UTF-8)", "$dir/$requirelevel/$name.txt"; my $lines = <$fh>; $lines =~ s/\n|\r/
/g; - use utf8; - utf8::encode($lines) unless ( utf8::is_utf8($lines) ); + $lines = Encode::encode('UTF-8', $lines) unless ( Encode::is_utf8($lines) ); my $mandatory = ($requirelevel =~ /(mandatory|requi|oblig|necess)/i); push @frameworklist, { diff --git a/C4/ItemType.pm b/C4/ItemType.pm index ad253a7741..cf61d40cbf 100644 --- a/C4/ItemType.pm +++ b/C4/ItemType.pm @@ -21,6 +21,7 @@ package C4::ItemType; use strict; use warnings; use C4::Context; +use Encode qw( encode ); our $AUTOLOAD; @@ -81,7 +82,7 @@ sub all { for ( @{$dbh->selectall_arrayref( "SELECT * FROM itemtypes ORDER BY description", { Slice => {} })} ) { - utf8::encode($_->{description}); + $_->{description} = Encode::encode('UTF-8', $_->{description}); push @itypes, $class->new($_); } return @itypes; @@ -105,7 +106,7 @@ sub get { "SELECT * FROM itemtypes WHERE itemtype = ?", undef, $itemtype ); if ( $data->{description} ) { - utf8::encode($data->{description}); + $data->{description} = Encode::encode('UTF-8', $data->{description}); } return $class->new($data); } diff --git a/C4/Output.pm b/C4/Output.pm index d09bba74c2..b7c9556da1 100644 --- a/C4/Output.pm +++ b/C4/Output.pm @@ -284,8 +284,6 @@ sub output_with_http_headers { # We can't encode here, that will double encode our templates, and xslt # We need to fix the encoding as it comes out of the database, or when we pass the variables to templates - -# utf8::encode($data) if utf8::is_utf8($data); $data =~ s/\&\;amp\; /\&\; /g; print $query->header($options), $data; diff --git a/C4/Search.pm b/C4/Search.pm index 1326f692bb..044e6e0f71 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -36,7 +36,7 @@ use URI::Escape; use Business::ISBN; use MARC::Record; use MARC::Field; -use utf8; +use Encode qw( decode is_utf8 ); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG); # set the version for version checking @@ -1982,8 +1982,8 @@ sub searchResults { my @repl = $marcrecord->field($1)->subfield($2); my $subfieldvalue = $repl[$i]; - if (! utf8::is_utf8($subfieldvalue)) { - utf8::decode($subfieldvalue); + if (! Encode::is_utf8($subfieldvalue)) { + $subfieldvalue = Encode::decode('UTF-8', $subfieldvalue); } $newline =~ s/\[$tag\]/$subfieldvalue/g; diff --git a/Koha/Template/Plugin/EncodeUTF8.pm b/Koha/Template/Plugin/EncodeUTF8.pm index d27ccec273..871cbb4673 100644 --- a/Koha/Template/Plugin/EncodeUTF8.pm +++ b/Koha/Template/Plugin/EncodeUTF8.pm @@ -21,11 +21,11 @@ use Modern::Perl; use base qw{Template::Plugin::Filter}; -use Encode qw{encode}; +use Encode qw{encode is_utf8}; sub filter { my ( $self, $value ) = @_; - return is_utf8( $value ) ? encode( 'UTF-8', $value ) : $value; + return Encode::is_utf8( $value ) ? Encode::encode( 'UTF-8', $value ) : $value; } 1; diff --git a/cataloguing/value_builder/macles.pl b/cataloguing/value_builder/macles.pl index 6644e65ef7..26aea400ab 100755 --- a/cataloguing/value_builder/macles.pl +++ b/cataloguing/value_builder/macles.pl @@ -84,10 +84,6 @@ my ($input) = @_; my @innerloop; my (%numbers,%cells,@colhdr,@rowhdr,@multiplelines,@lists,$table); while (my $tab = $rq->fetchrow_hashref){ -# if (! utf8::is_utf8($tab->{lib})) { -# utf8::decode($tab->{lib}); -# } -# warn $tab->{lib}; my $number=substr($tab->{authorised_value},0,1); if ($tab->{authorised_value}=~/[0-9]XX/){ $numbers{$number}->{'hdr_tab'}=$tab->{lib}; diff --git a/reports/guided_reports.pl b/reports/guided_reports.pl index 981816daad..784e62e966 100755 --- a/reports/guided_reports.pl +++ b/reports/guided_reports.pl @@ -20,6 +20,7 @@ use Modern::Perl; use CGI qw/-utf8/; use Text::CSV::Encoded; +use Encode qw( decode ); use URI::Escape; use File::Temp; use File::Basename qw( dirname ); @@ -915,7 +916,7 @@ sub header_cell_values { foreach my $c (@{$sth->{NAME}}) { # TODO in Bug 11944 #FIXME apparently DBI still needs a utf8 fix for this? - utf8::decode($c); + $c = Encode::decode('UTF-8', $c); push @cols, $c; } return @cols; diff --git a/serials/serials-edit.pl b/serials/serials-edit.pl index 2eeed10923..54c8ccd212 100755 --- a/serials/serials-edit.pl +++ b/serials/serials-edit.pl @@ -64,6 +64,7 @@ op can be : use strict; use warnings; use CGI; +use Encode qw( decode is_utf8 ); use C4::Auth; use C4::Dates qw/format_date format_date_in_iso/; use C4::Biblio; @@ -202,7 +203,7 @@ if ( $op and $op eq 'serialchangestatus' ) { # Convert serialseqs to UTF-8 to prevent encoding problems foreach my $seq (@serialseqs) { - utf8::decode($seq) unless utf8::is_utf8($seq); + $seq = Encode::decode('UTF-8', $seq) unless Encode::is_utf8($seq); } my $newserial; diff --git a/t/Charset.t b/t/Charset.t index 6afc1e8f4e..09ba5336b9 100755 --- a/t/Charset.t +++ b/t/Charset.t @@ -18,6 +18,8 @@ use Modern::Perl; use Test::More tests => 16; +use Encode qw( is_utf8 ); + use MARC::Record; use utf8; @@ -40,9 +42,9 @@ my $octets = "abc"; ok(IsStringUTF8ish($octets), "verify octets are valid UTF-8 (ASCII)"); $octets = "flamb\c3\a9"; -ok(!utf8::is_utf8($octets), "verify that string does not have Perl UTF-8 flag on"); +ok(!Encode::is_utf8($octets), "verify that string does not have Perl UTF-8 flag on"); ok(IsStringUTF8ish($octets), "verify octets are valid UTF-8 (LATIN SMALL LETTER E WITH ACUTE)"); -ok(!utf8::is_utf8($octets), "verify that IsStringUTF8ish does not magically turn Perl UTF-8 flag on"); +ok(!Encode::is_utf8($octets), "verify that IsStringUTF8ish does not magically turn Perl UTF-8 flag on"); $octets = "a\xc2" . "c"; ok(!IsStringUTF8ish($octets), "verify octets are not valid UTF-8"); @@ -57,14 +59,14 @@ $record->append_fields( MARC::Field->new('245', ' ', ' ', a => 'Rayuela'), ); # Verify our data serves its purpose -ok( !utf8::is_utf8($record->subfield('100','a')) && - !utf8::is_utf8($record->subfield('245','a')), +ok( !Encode::is_utf8($record->subfield('100','a')) && + !Encode::is_utf8($record->subfield('245','a')), 'Verify that the subfields are NOT set the UTF-8 flag yet' ); SetUTF8Flag($record); -ok( utf8::is_utf8($record->subfield('100','a')) && - utf8::is_utf8($record->subfield('245','a')), +ok( Encode::is_utf8($record->subfield('100','a')) && + Encode::is_utf8($record->subfield('245','a')), 'SetUTF8Flag sets the UTF-8 flag to all subfields' ); is( nsb_clean("˜Leœ Moyen Âge"), "Le Moyen Âge", "nsb_clean removes ˜ and œ" ); -- 2.39.5