Browse Source

Bug 11944: replace use of utf8 with Encode

See the wiki page for the explanation.

Signed-off-by: Paola Rossi <paola.rossi@cineca.it>
Signed-off-by: Bernardo Gonzalez Kriegel <bgkriegel@gmail.com>
Signed-off-by: Dobrica Pavlinusic <dpavlin@rot13.org>

Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@gmail.com>
3.20.x
Jonathan Druart 10 years ago
committed by Tomas Cohen Arazi
parent
commit
55107741a2
  1. 7
      C4/Biblio.pm
  2. 7
      C4/Charset.pm
  3. 7
      C4/Installer.pm
  4. 5
      C4/ItemType.pm
  5. 2
      C4/Output.pm
  6. 6
      C4/Search.pm
  7. 4
      Koha/Template/Plugin/EncodeUTF8.pm
  8. 4
      cataloguing/value_builder/macles.pl
  9. 3
      reports/guided_reports.pl
  10. 3
      serials/serials-edit.pl
  11. 14
      t/Charset.t

7
C4/Biblio.pm

@ -23,7 +23,7 @@ use strict;
use warnings;
use Carp;
# use utf8;
use Encode qw( decode );
use MARC::Record;
use MARC::File::USMARC;
use MARC::File::XML;
@ -2336,9 +2336,6 @@ sub TransformHtmlToXml {
@$values[$i] =~ s/"/&quot;/g;
@$values[$i] =~ s/'/&apos;/g;
# if ( !utf8::is_utf8( @$values[$i] ) ) {
# utf8::decode( @$values[$i] );
# }
if ( ( @$tags[$i] ne $prevtag ) ) {
$j++ unless ( @$tags[$i] eq "" );
my $indicator1 = eval { substr( @$indicator[$j], 0, 1 ) };
@ -2473,7 +2470,7 @@ sub TransformHtmlToMarc {
foreach my $param_name ( keys %$cgi_params ) {
if ( $param_name =~ /^tag_/ ) {
my $param_value = $cgi_params->{$param_name};
if ( utf8::decode($param_value) ) {
if ( $param_value = Encode::decode('UTF-8', $param_value) ) {
$cgi_params->{$param_name} = $param_value;
}

7
C4/Charset.pm

@ -25,6 +25,7 @@ use Text::Iconv;
use C4::Context;
use C4::Debug;
use Unicode::Normalize;
use Encode qw( decode encode is_utf8 );
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ -111,8 +112,8 @@ will assume that this situation occur does not very often.
sub IsStringUTF8ish {
my $str = shift;
return 1 if utf8::is_utf8($str);
return utf8::decode($str);
return 1 if Encode::is_utf8($str);
return Encode::decode('UTF-8', $str);
}
=head2 SetUTF8Flag
@ -180,7 +181,7 @@ Sample code :
sub NormalizeString{
my ($string,$nfd,$transform)=@_;
return $string unless defined($string); # force scalar context return.
utf8::decode($string) unless (utf8::is_utf8($string));
$string = Encode::decode('UTF-8', $string) unless (Encode::is_utf8($string));
if ($nfd){
$string= NFD($string);
}

7
C4/Installer.pm

@ -20,6 +20,7 @@ package C4::Installer;
use strict;
#use warnings; FIXME - Bug 2505
use Encode qw( encode is_utf8 );
our $VERSION = 3.07.00.049;
use C4::Context;
use C4::Installer::PerlModules;
@ -136,8 +137,7 @@ sub marc_framework_sql_list {
open my $fh, "<:encoding(UTF-8)", "$dir/$requirelevel/$name.txt";
my $lines = <$fh>;
$lines =~ s/\n|\r/<br \/>/g;
use utf8;
utf8::encode($lines) unless ( utf8::is_utf8($lines) );
$lines = Encode::encode('UTF-8', $lines) unless ( Encode::is_utf8($lines) );
my $mandatory = ($requirelevel =~ /(mandatory|requi|oblig|necess)/i);
push @frameworklist,
{
@ -214,8 +214,7 @@ sub sample_data_sql_list {
open my $fh , "<:encoding(UTF-8)", "$dir/$requirelevel/$name.txt";
my $lines = <$fh>;
$lines =~ s/\n|\r/<br \/>/g;
use utf8;
utf8::encode($lines) unless ( utf8::is_utf8($lines) );
$lines = Encode::encode('UTF-8', $lines) unless ( Encode::is_utf8($lines) );
my $mandatory = ($requirelevel =~ /(mandatory|requi|oblig|necess)/i);
push @frameworklist,
{

5
C4/ItemType.pm

@ -21,6 +21,7 @@ package C4::ItemType;
use strict;
use warnings;
use C4::Context;
use Encode qw( encode );
our $AUTOLOAD;
@ -81,7 +82,7 @@ sub all {
for ( @{$dbh->selectall_arrayref(
"SELECT * FROM itemtypes ORDER BY description", { Slice => {} })} )
{
utf8::encode($_->{description});
$_->{description} = Encode::encode('UTF-8', $_->{description});
push @itypes, $class->new($_);
}
return @itypes;
@ -105,7 +106,7 @@ sub get {
"SELECT * FROM itemtypes WHERE itemtype = ?", undef, $itemtype
);
if ( $data->{description} ) {
utf8::encode($data->{description});
$data->{description} = Encode::encode('UTF-8', $data->{description});
}
return $class->new($data);
}

2
C4/Output.pm

@ -284,8 +284,6 @@ sub output_with_http_headers {
# We can't encode here, that will double encode our templates, and xslt
# We need to fix the encoding as it comes out of the database, or when we pass the variables to templates
# utf8::encode($data) if utf8::is_utf8($data);
$data =~ s/\&amp\;amp\; /\&amp\; /g;
print $query->header($options), $data;

6
C4/Search.pm

@ -36,7 +36,7 @@ use URI::Escape;
use Business::ISBN;
use MARC::Record;
use MARC::Field;
use utf8;
use Encode qw( decode is_utf8 );
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
# set the version for version checking
@ -1982,8 +1982,8 @@ sub searchResults {
my @repl = $marcrecord->field($1)->subfield($2);
my $subfieldvalue = $repl[$i];
if (! utf8::is_utf8($subfieldvalue)) {
utf8::decode($subfieldvalue);
if (! Encode::is_utf8($subfieldvalue)) {
$subfieldvalue = Encode::decode('UTF-8', $subfieldvalue);
}
$newline =~ s/\[$tag\]/$subfieldvalue/g;

4
Koha/Template/Plugin/EncodeUTF8.pm

@ -21,11 +21,11 @@ use Modern::Perl;
use base qw{Template::Plugin::Filter};
use Encode qw{encode};
use Encode qw{encode is_utf8};
sub filter {
my ( $self, $value ) = @_;
return is_utf8( $value ) ? encode( 'UTF-8', $value ) : $value;
return Encode::is_utf8( $value ) ? Encode::encode( 'UTF-8', $value ) : $value;
}
1;

4
cataloguing/value_builder/macles.pl

@ -84,10 +84,6 @@ my ($input) = @_;
my @innerloop;
my (%numbers,%cells,@colhdr,@rowhdr,@multiplelines,@lists,$table);
while (my $tab = $rq->fetchrow_hashref){
# if (! utf8::is_utf8($tab->{lib})) {
# utf8::decode($tab->{lib});
# }
# warn $tab->{lib};
my $number=substr($tab->{authorised_value},0,1);
if ($tab->{authorised_value}=~/[0-9]XX/){
$numbers{$number}->{'hdr_tab'}=$tab->{lib};

3
reports/guided_reports.pl

@ -20,6 +20,7 @@
use Modern::Perl;
use CGI qw/-utf8/;
use Text::CSV::Encoded;
use Encode qw( decode );
use URI::Escape;
use File::Temp;
use File::Basename qw( dirname );
@ -915,7 +916,7 @@ sub header_cell_values {
foreach my $c (@{$sth->{NAME}}) {
# TODO in Bug 11944
#FIXME apparently DBI still needs a utf8 fix for this?
utf8::decode($c);
$c = Encode::decode('UTF-8', $c);
push @cols, $c;
}
return @cols;

3
serials/serials-edit.pl

@ -64,6 +64,7 @@ op can be :
use strict;
use warnings;
use CGI;
use Encode qw( decode is_utf8 );
use C4::Auth;
use C4::Dates qw/format_date format_date_in_iso/;
use C4::Biblio;
@ -202,7 +203,7 @@ if ( $op and $op eq 'serialchangestatus' ) {
# Convert serialseqs to UTF-8 to prevent encoding problems
foreach my $seq (@serialseqs) {
utf8::decode($seq) unless utf8::is_utf8($seq);
$seq = Encode::decode('UTF-8', $seq) unless Encode::is_utf8($seq);
}
my $newserial;

14
t/Charset.t

@ -18,6 +18,8 @@
use Modern::Perl;
use Test::More tests => 16;
use Encode qw( is_utf8 );
use MARC::Record;
use utf8;
@ -40,9 +42,9 @@ my $octets = "abc";
ok(IsStringUTF8ish($octets), "verify octets are valid UTF-8 (ASCII)");
$octets = "flamb\c3\a9";
ok(!utf8::is_utf8($octets), "verify that string does not have Perl UTF-8 flag on");
ok(!Encode::is_utf8($octets), "verify that string does not have Perl UTF-8 flag on");
ok(IsStringUTF8ish($octets), "verify octets are valid UTF-8 (LATIN SMALL LETTER E WITH ACUTE)");
ok(!utf8::is_utf8($octets), "verify that IsStringUTF8ish does not magically turn Perl UTF-8 flag on");
ok(!Encode::is_utf8($octets), "verify that IsStringUTF8ish does not magically turn Perl UTF-8 flag on");
$octets = "a\xc2" . "c";
ok(!IsStringUTF8ish($octets), "verify octets are not valid UTF-8");
@ -57,14 +59,14 @@ $record->append_fields(
MARC::Field->new('245', ' ', ' ', a => 'Rayuela'),
);
# Verify our data serves its purpose
ok( !utf8::is_utf8($record->subfield('100','a')) &&
!utf8::is_utf8($record->subfield('245','a')),
ok( !Encode::is_utf8($record->subfield('100','a')) &&
!Encode::is_utf8($record->subfield('245','a')),
'Verify that the subfields are NOT set the UTF-8 flag yet' );
SetUTF8Flag($record);
ok( utf8::is_utf8($record->subfield('100','a')) &&
utf8::is_utf8($record->subfield('245','a')),
ok( Encode::is_utf8($record->subfield('100','a')) &&
Encode::is_utf8($record->subfield('245','a')),
'SetUTF8Flag sets the UTF-8 flag to all subfields' );
is( nsb_clean("˜Leœ Moyen Âge"), "Le Moyen Âge", "nsb_clean removes ˜ and œ" );

Loading…
Cancel
Save