From d542740ab8877234d043c8a11cb19a6004e72656 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sun, 10 Jun 2012 15:18:42 +0100 Subject: [PATCH] Bug 6554 - make Koha internally utf-8 clean In current implementation (mostly commented out in this patch) uses heuristic to guess which strings need decoding from utf-8 to binary representation and doesn't support utf-8 characters in templates and has problems with utf-8 data from database. With this changes, Koha perl code always uses utf-8 encoding correctly. All incomming data from database is allready correctly marked as utf-8, and decoding of utf8 is required only from Zebra and XSLT transfers which don't set utf-8 flag correctly. For output, standard perl :encoding(utf8) handler is used so it also removes various "wide character" warnings as side-effect. Test scenario: 1. make sure that you have utf-8 characters in your biblio records, patrons, categories etc. 2. try to search records on intranet and opac which contain utf-8 characters 3. install language which has utf-8 characters, e.g. uk-UA dpavlin@koha-dev:/srv/koha/misc/translator(bug_6554) $ PERL5LIB=/srv/koha/ perl translate install uk-UA 4. switch language to uk-UA and verify that templates display correctly 5. test search and Z39.50 search and verify that caracters are correct Signed-off-by: Owen Leonard I followed the test plan, adding utf-8 characters to library names, patron categories, titles, and authorized values. I tried the uk-UA translation and everything looked good. When performing Z39.50 searches for titles containing utf-8 characters I got results which were still occasionally contaminated with dummy characters [?] but I assume this is Z39.50's fault not the patch's. Signed-off-by: Marcel de Rooy Signed-off-by: Bernardo Gonzalez Kriegel Already signed, add mine. Signed-off-by: Jared Camins-Esakov --- C4/Context.pm | 4 ++- C4/ItemType.pm | 2 +- C4/Output.pm | 1 + C4/Search.pm | 25 ++++++++++--------- C4/Templates.pm | 44 ++------------------------------- admin/preferences.pl | 1 + admin/z3950servers.pl | 2 +- authorities/authorities-home.pl | 2 +- catalogue/search.pl | 2 +- cataloguing/addbiblio.pl | 2 +- cataloguing/additem.pl | 2 +- cataloguing/z3950_search.pl | 2 +- members/member.pl | 2 +- opac/opac-search.pl | 4 +-- reports/guided_reports.pl | 2 +- reserve/renewscript.pl | 2 +- serials/routing-preview.pl | 2 +- serials/routing.pl | 2 +- 18 files changed, 35 insertions(+), 68 deletions(-) diff --git a/C4/Context.pm b/C4/Context.pm index 7709f7effb..4d170b2fb6 100644 --- a/C4/Context.pm +++ b/C4/Context.pm @@ -1104,7 +1104,9 @@ set_userenv is called in Auth.pm #' sub set_userenv { - my ($usernum, $userid, $usercnum, $userfirstname, $usersurname, $userbranch, $branchname, $userflags, $emailaddress, $branchprinter, $persona)= @_; + my ($usernum, $userid, $usercnum, $userfirstname, $usersurname, $userbranch, $branchname, $userflags, $emailaddress, $branchprinter, $persona)= + map { utf8::decode($_); $_ } # CGI::Session doesn't handle utf-8, so we decode it here + @_; my $var=$context->{"activeuser"} || ''; my $cell = { "number" => $usernum, diff --git a/C4/ItemType.pm b/C4/ItemType.pm index 648cff95f5..fffa0a280d 100644 --- a/C4/ItemType.pm +++ b/C4/ItemType.pm @@ -81,7 +81,7 @@ sub all { for ( @{$dbh->selectall_arrayref( "SELECT * FROM itemtypes ORDER BY description", { Slice => {} })} ) { - utf8::encode($_->{description}); +# utf8::encode($_->{description}); push @itypes, $class->new($_); } return @itypes; diff --git a/C4/Output.pm b/C4/Output.pm index 6e2c89a87a..458b92f040 100644 --- a/C4/Output.pm +++ b/C4/Output.pm @@ -308,6 +308,7 @@ sub output_with_http_headers { } sub output_html_with_http_headers { + binmode( STDOUT, ":encoding(utf8)" ); my ( $query, $cookie, $data, $status ) = @_; output_with_http_headers( $query, $cookie, $data, 'html', $status ); } diff --git a/C4/Search.pm b/C4/Search.pm index d3d3ce1d73..d4ce9fb008 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -484,6 +484,7 @@ sub getRecords { # not an index scan else { $record = $results[ $i - 1 ]->record($j)->raw(); + utf8::decode( $record ); # warn "RECORD $j:".$record; $results_hash->{'RECORDS'}[$j] = $record; @@ -501,6 +502,7 @@ sub getRecords { for ( my $j = 0 ; $j < $jmax ; $j++ ) { my $render_record = $results[ $i - 1 ]->record($j)->render(); + utf8::decode($render_record); my @used_datas = (); foreach my $tag ( @{ $facet->{tags} } ) { @@ -714,6 +716,7 @@ sub pazGetRecords { for (my $i = 0; $i < $count; $i++) { # FIXME -- may need to worry about diacritics here my $rec = $paz->record($recid, $i); + utf8::decode( $rec ); push @{ $result_group->{'RECORDS'} }, $rec; } @@ -1290,17 +1293,17 @@ sub buildQuery { if ( @limits ) { $q .= ' and '.join(' and ', @limits); } - return ( undef, $q, $q, "q=ccl=".uri_escape($q), $q, '', '', '', '', 'ccl' ); + return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $q, '', '', '', '', 'ccl' ); } if ( $query =~ /^cql=/ ) { - return ( undef, $', $', "q=cql=".uri_escape($'), $', '', '', '', '', 'cql' ); + return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', '', 'cql' ); } if ( $query =~ /^pqf=/ ) { if ($query_desc) { - $query_cgi = "q=".uri_escape($query_desc); + $query_cgi = "q=".uri_escape_utf8($query_desc); } else { $query_desc = $'; - $query_cgi = "q=pqf=".uri_escape($'); + $query_cgi = "q=pqf=".uri_escape_utf8($'); } return ( undef, $', $', $query_cgi, $query_desc, '', '', '', '', 'pqf' ); } @@ -1472,9 +1475,9 @@ sub buildQuery { $query .= " $operators[$i-1] "; $query .= " $index_plus " unless $indexes_set; $query .= " $operand"; - $query_cgi .= "&op=".uri_escape($operators[$i-1]); - $query_cgi .= "&idx=".uri_escape($index) if $index; - $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i]; + $query_cgi .= "&op=".uri_escape_utf8($operators[$i-1]); + $query_cgi .= "&idx=".uri_escape_utf8($index) if $index; + $query_cgi .= "&q=".uri_escape_utf8($operands[$i]) if $operands[$i]; $query_desc .= " $operators[$i-1] $index_plus $operands[$i]"; } @@ -1484,8 +1487,8 @@ sub buildQuery { $query .= " and "; $query .= "$index_plus " unless $indexes_set; $query .= "$operand"; - $query_cgi .= "&op=and&idx=".uri_escape($index) if $index; - $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i]; + $query_cgi .= "&op=and&idx=".uri_escape_utf8($index) if $index; + $query_cgi .= "&q=".uri_escape_utf8($operands[$i]) if $operands[$i]; $query_desc .= " and $index_plus $operands[$i]"; } } @@ -1497,8 +1500,8 @@ sub buildQuery { $query .= " $index_plus " unless $indexes_set; $query .= $operand; $query_desc .= " $index_plus $operands[$i]"; - $query_cgi .= "&idx=".uri_escape($index) if $index; - $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i]; + $query_cgi .= "&idx=".uri_escape_utf8($index) if $index; + $query_cgi .= "&q=".uri_escape_utf8($operands[$i]) if $operands[$i]; $previous_operand = 1; } } #/if $operands diff --git a/C4/Templates.pm b/C4/Templates.pm index 88b9b01687..7f50143279 100644 --- a/C4/Templates.pm +++ b/C4/Templates.pm @@ -67,6 +67,7 @@ sub new { COMPILE_DIR => C4::Context->config('template_cache_dir')?C4::Context->config('template_cache_dir'):'',, INCLUDE_PATH => \@includes, FILTERS => {}, + ENCODING => 'utf8', # templates don't have BOM, see Template::FAQ } ) or die Template->error(); my $self = { @@ -114,57 +115,16 @@ sub output { C4::Context->preference('opaclayoutstylesheet'); # add variables set via param to $vars for processing - # and clean any utf8 mess for my $k ( keys %{ $self->{VARS} } ) { $vars->{$k} = $self->{VARS}->{$k}; - if (ref($vars->{$k}) eq 'ARRAY'){ - utf8_arrayref($vars->{$k}); - } - elsif (ref($vars->{$k}) eq 'HASH'){ - utf8_hashref($vars->{$k}); - } - else { - utf8::encode($vars->{$k}) if utf8::is_utf8($vars->{$k}); - } } my $data; -# binmode( STDOUT, ":utf8" ); $template->process( $self->filename, $vars, \$data ) || die "Template process failed: ", $template->error(); return $data; } -sub utf8_arrayref { - my $arrayref = shift; - foreach my $element (@$arrayref){ - if (ref($element) eq 'ARRAY'){ - utf8_arrayref($element); - next; - } - if (ref($element) eq 'HASH'){ - utf8_hashref($element); - next; - } - utf8::encode($element) if utf8::is_utf8($element); - } -} - -sub utf8_hashref { - my $hashref = shift; - for my $key (keys %{$hashref}){ - if (ref($hashref->{$key}) eq 'ARRAY'){ - utf8_arrayref($hashref->{$key}); - next; - } - if (ref($hashref->{$key}) eq 'HASH'){ - utf8_hashref($hashref->{$key}); - next; - } - utf8::encode($hashref->{$key}) if utf8::is_utf8($hashref->{$key}); - } -} - - + # FIXME - this is a horrible hack to cache # the current known-good language, temporarily # put in place to resolve bug 4403. It is diff --git a/admin/preferences.pl b/admin/preferences.pl index db91e76e7d..e24657e0cc 100755 --- a/admin/preferences.pl +++ b/admin/preferences.pl @@ -34,6 +34,7 @@ use File::Spec; use IO::File; use YAML::Syck qw(); $YAML::Syck::ImplicitTyping = 1; +$YAML::Syck::ImplicitUnicode = 1; # force utf-8 for preference encoding our $lang; # use Smart::Comments; diff --git a/admin/z3950servers.pl b/admin/z3950servers.pl index 92cc3e59f6..897b55235c 100755 --- a/admin/z3950servers.pl +++ b/admin/z3950servers.pl @@ -21,7 +21,7 @@ use strict; use warnings; -use CGI; +use CGI qw( -utf8 ); use C4::Context; use C4::Auth; use C4::Output; diff --git a/authorities/authorities-home.pl b/authorities/authorities-home.pl index f67f5c9c40..2fe4622d64 100755 --- a/authorities/authorities-home.pl +++ b/authorities/authorities-home.pl @@ -125,7 +125,7 @@ if ( $op eq "do_search" ) { # next/previous would not work anymore # construction of the url of each page - my $value_url = uri_escape($value); + my $value_url = uri_escape_utf8($value); my $base_url = "authorities-home.pl?" ."marclist=$marclist" ."&and_or=$and_or" diff --git a/catalogue/search.pl b/catalogue/search.pl index 8624b3b43a..88e6825f18 100755 --- a/catalogue/search.pl +++ b/catalogue/search.pl @@ -154,7 +154,7 @@ use C4::Branch; # GetBranches my $DisplayMultiPlaceHold = C4::Context->preference("DisplayMultiPlaceHold"); # create a new CGI object # FIXME: no_undef_params needs to be tested -use CGI qw('-no_undef_params'); +use CGI qw( -no_undef_params -utf8 ); my $cgi = new CGI; my ($template,$borrowernumber,$cookie); diff --git a/cataloguing/addbiblio.pl b/cataloguing/addbiblio.pl index 3314f1a02a..e2ea1cdf89 100755 --- a/cataloguing/addbiblio.pl +++ b/cataloguing/addbiblio.pl @@ -880,7 +880,7 @@ if ( $op eq "addbiblio" ) { .'&frameworkcode='.$frameworkcode .'&circborrowernumber='.$fa_circborrowernumber .'&branch='.$fa_branch - .'&barcode='.uri_escape($fa_barcode) + .'&barcode='.uri_escape_utf8($fa_barcode) .'&stickyduedate='.$fa_stickyduedate .'&duedatespec='.$fa_duedatespec ); diff --git a/cataloguing/additem.pl b/cataloguing/additem.pl index a8a8649369..aa7095d250 100755 --- a/cataloguing/additem.pl +++ b/cataloguing/additem.pl @@ -517,7 +517,7 @@ if ($op eq "additem") { print $input->redirect( '/cgi-bin/koha/circ/circulation.pl?' .'borrowernumber='.$fa_circborrowernumber - .'&barcode='.uri_escape($fa_barcode) + .'&barcode='.uri_escape_utf8($fa_barcode) .'&duedatespec='.$fa_duedatespec .'&stickyduedate=1' ); diff --git a/cataloguing/z3950_search.pl b/cataloguing/z3950_search.pl index b8e8a58e66..05f5f2b899 100755 --- a/cataloguing/z3950_search.pl +++ b/cataloguing/z3950_search.pl @@ -20,7 +20,7 @@ use strict; use warnings; -use CGI; +use CGI qw( -utf8 ); use C4::Auth; use C4::Output; diff --git a/members/member.pl b/members/member.pl index d705810cfb..55f6a5bedd 100755 --- a/members/member.pl +++ b/members/member.pl @@ -27,7 +27,7 @@ use strict; #use warnings; FIXME - Bug 2505 use C4::Auth; use C4::Output; -use CGI; +use CGI qw( -utf8 ); use C4::Members; use C4::Branch; use C4::Category; diff --git a/opac/opac-search.pl b/opac/opac-search.pl index 9a58bccdc1..3ca9d1d5d5 100755 --- a/opac/opac-search.pl +++ b/opac/opac-search.pl @@ -60,7 +60,7 @@ use Business::ISBN; my $DisplayMultiPlaceHold = C4::Context->preference("DisplayMultiPlaceHold"); # create a new CGI object # FIXME: no_undef_params needs to be tested -use CGI qw('-no_undef_params'); +use CGI qw( -no_undef_params -utf8 ); my $cgi = new CGI; my $branch_group_limit = $cgi->param("branch_group_limit"); @@ -652,7 +652,7 @@ for (my $i=0;$i<@servers;$i++) { $newsearchcookie = $cgi->cookie( -name => 'KohaOpacRecentSearches', # We uri_escape the whole freezed structure so we're sure we won't have any encoding problems - -value => uri_escape(freeze(\@recentSearches)), + -value => uri_escape_utf8(freeze(\@recentSearches)), -expires => '' ); $cookie = [$cookie, $newsearchcookie]; diff --git a/reports/guided_reports.pl b/reports/guided_reports.pl index 0171cb46a0..c25767a2fa 100755 --- a/reports/guided_reports.pl +++ b/reports/guided_reports.pl @@ -665,7 +665,7 @@ elsif ($phase eq 'Run this report'){ my $totpages = int($total/$limit) + (($total % $limit) > 0 ? 1 : 0); my $url = "/cgi-bin/koha/reports/guided_reports.pl?reports=$report_id&phase=Run%20this%20report&limit=$limit"; if (@sql_params) { - $url = join('&sql_params=', $url, map { URI::Escape::uri_escape($_) } @sql_params); + $url = join('&sql_params=', $url, map { URI::Escape::uri_escape_utf8($_) } @sql_params); } $template->param( 'results' => \@rows, diff --git a/reserve/renewscript.pl b/reserve/renewscript.pl index dbdced9c80..232db8bb89 100755 --- a/reserve/renewscript.pl +++ b/reserve/renewscript.pl @@ -127,7 +127,7 @@ foreach my $barcode (@barcodes) { # redirection to the referrer page # if ( $input->param('destination') eq "circ" ) { - $cardnumber = uri_escape($cardnumber); + $cardnumber = uri_escape_utf8($cardnumber); print $input->redirect( '/cgi-bin/koha/circ/circulation.pl?findborrower=' . $cardnumber . $failedrenews diff --git a/serials/routing-preview.pl b/serials/routing-preview.pl index 437c85288b..9869f1c925 100755 --- a/serials/routing-preview.pl +++ b/serials/routing-preview.pl @@ -127,7 +127,7 @@ $routingnotes =~ s/\n/\
/g; $template->param( title => $subs->{'bibliotitle'}, issue => $issue, - issue_escaped => URI::Escape::uri_escape($issue), + issue_escaped => URI::Escape::uri_escape_utf8($issue), subscriptionid => $subscriptionid, memberloop => $memberloop, routingnotes => $routingnotes, diff --git a/serials/routing.pl b/serials/routing.pl index a95631bdf6..5dee5eafbb 100755 --- a/serials/routing.pl +++ b/serials/routing.pl @@ -62,7 +62,7 @@ if($op eq 'add'){ if($op eq 'save'){ my $sth = $dbh->prepare('UPDATE serial SET routingnotes = ? WHERE subscriptionid = ?'); $sth->execute($notes,$subscriptionid); - my $urldate = URI::Escape::uri_escape($date_selected); + my $urldate = URI::Escape::uri_escape_utf8($date_selected); print $query->redirect("routing-preview.pl?subscriptionid=$subscriptionid&issue=$urldate"); } -- 2.39.5