From b9258aa903efe89ad3272d54f549b8d8e7d7d8bd Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Thu, 12 Oct 2023 11:44:16 +0200 Subject: [PATCH] Bug 35043: Use Locale::PO->quote We have our own "quote_po" sub that is stating that Locale::PO::quote is buggy because it does not deal with quoting new lines correctly. However it seems that it is fixed now. Ideally we could use Locale::PO::quote everywhere, but it does not escape tab characters: $string =~ s/\\(?!t)/\\\\/g; # \t is a tab This means the following: msgid "Tabulation (\\t)" msgstr "Tabulation (\\t)" become: -msgid "Tabulation (\t)" -msgstr "Tabulation (\t)" And we are seeing the following on Weblate: https://snipboard.io/BjQmDC.jpg Note that Locale::PO has not been updated since 2014... The real problem behind this is that we have 2 methods to quote strings. At first glance it seems that Locale::PO::quote was not used before, but with the introduction of the koha-i18n project we will have scripts that will use Locale::PO->save_file_fromarray, which uses Locale::PO->quote => Those scripts will be used on the translation server for post processing (security reason, marking potential XSS strings as fuzzy). Test plan: 0. Do not apply the patch 1. gulp po:update --lang LANG # Replace LANG with your favorite language code 2. git commit -a -m"init PO files" 3. Apply this patch 4. Repeate 1. 5. git diff => The change is about the "Tabulation" and "New line" strings from tools/csv-profiles.tt 6. Translate them (replace the \t and \n with %s) and remove the fuzzy flag 7. install the template: cd misc/translator && perl translate install LANG 8. Enable the language, use it and go to the "Nouveau profil CSV" view => Notice that the \t and \n are correctly displayed. Signed-off-by: Owen Leonard Signed-off-by: Marcel de Rooy --- misc/translator/TmplTokenizer.pm | 9 --------- misc/translator/tmpl_process3.pl | 2 +- misc/translator/xgettext.pl | 10 +++------- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/misc/translator/TmplTokenizer.pm b/misc/translator/TmplTokenizer.pm index 05cc54ea6b..1e95c54879 100644 --- a/misc/translator/TmplTokenizer.pm +++ b/misc/translator/TmplTokenizer.pm @@ -461,15 +461,6 @@ sub trim { return wantarray? (substr($s0, 0, $l1), $s, substr($s0, $l0 - $l2)): $s; } -sub quote_po { - my($s) = @_; - # Locale::PO->quote is buggy, it doesn't quote newlines :-/ - $s =~ s/([\\"])/\\$1/gs; - $s =~ s/\n/\\n/g; - #$s =~ s/[\177-\377]/ sprintf("\\%03o", ord($&)) /egs; - return "\"$s\""; -} - sub charset_canon { my($charset) = @_; $charset = uc($charset); diff --git a/misc/translator/tmpl_process3.pl b/misc/translator/tmpl_process3.pl index ddb02e7957..6417226d9b 100755 --- a/misc/translator/tmpl_process3.pl +++ b/misc/translator/tmpl_process3.pl @@ -40,7 +40,7 @@ sub find_translation { if ($s =~ /\S/s) { $key = TmplTokenizer::string_canon($key); $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out); - $key = TmplTokenizer::quote_po($key); + $key = Locale::PO->quote($key); } if (defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote($href->{$key}->msgstr)){ if ($s =~ /^(\s+)/){ diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl index 0ebf93ebc3..fcb89f8cf2 100755 --- a/misc/translator/xgettext.pl +++ b/misc/translator/xgettext.pl @@ -265,13 +265,9 @@ EOF } printf $OUTPUT "#, c-format\n" if $cformat_p; - printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po( - TmplTokenizer::string_canon( - TmplTokenizer::charset_convert( $t, $charset_in, $charset_out ) - ) - ); - printf $OUTPUT "msgstr %s\n\n", (defined $translation{$t}? - TmplTokenizer::quote_po( $translation{$t} ): "\"\""); + my $msgid = TmplTokenizer::string_canon( TmplTokenizer::charset_convert( $t, $charset_in, $charset_out ) ); + printf $OUTPUT "msgid %s\n", ( defined $msgid && length $msgid ? Locale::PO->quote($msgid) : q{""} ); + printf $OUTPUT "msgstr %s\n\n", ( defined $translation{$t} ? Locale::PO->quote( $translation{$t} ) : q{""} ); } } -- 2.20.1