From 578745dcbec7897ea28bcb9c75c2d90a441ed2bb Mon Sep 17 00:00:00 2001 From: Jonathan Druart Date: Fri, 6 Oct 2023 16:48:57 +0200 Subject: [PATCH] Bug 34959: Sort PO files correctly The PO files are not sorted when we update them which leads to unnecessary changes that are commited: hard to see differences and make git index grow superfluously. Test plan: 0. Do not apply this patch 1. gulp po:update --lang es-ES 2. git commit -a -m"First PO update" 3. Run again the gulp update command 4. git diff => You have a lot of changes generated here, the po:update is not idempotent. 5. Apply this patch 6. Run the gulp update command 7 git commit -a -m"PO update after 34959" 8. Run the gulp update command 9. git diff => No changes are generated Note that this patch will all the entries by files, and per line numbers. It fixes a bug in some condition, where we add information/context about the string. For instance search for "For the first occurrence" in the file. Prior to this patch this was not correct, we didn't add info about the first occurrence (but whichever in the list). Signed-off-by: David Nind Signed-off-by: Kyle M Hall --- gulpfile.js | 8 +-- misc/translator/xgettext.pl | 137 ++++++++++++++++++++---------------- 2 files changed, 80 insertions(+), 65 deletions(-) diff --git a/gulpfile.js b/gulpfile.js index 6b3ab44cf8..95f557c5c7 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -144,7 +144,7 @@ const poTypes = Object.keys(poTasks); function po_extract_marc (type) { return src(`koha-tmpl/*-tmpl/*/en/**/*${type}*`, { read: false, nocase: true }) - .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', `Koha-marc-${type}.pot`)) + .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', `Koha-marc-${type}.pot`)) .pipe(dest('misc/translator')) } @@ -163,7 +163,7 @@ function po_extract_staff () { ]; return src(globs, { read: false, nocase: true }) - .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', 'Koha-staff-prog.pot')) + .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', 'Koha-staff-prog.pot')) .pipe(dest('misc/translator')) } @@ -179,7 +179,7 @@ function po_extract_opac () { ]; return src(globs, { read: false, nocase: true }) - .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', 'Koha-opac-bootstrap.pot')) + .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', 'Koha-opac-bootstrap.pot')) .pipe(dest('misc/translator')) } @@ -284,7 +284,7 @@ function po_create_installer_marc21 () { return po_create_type('installer-MARC2 function po_create_installer_unimarc () { return po_create_type('installer-UNIMARC') } function po_update_type (type) { - const msgmerge_opts = '--backup=off --quiet --sort-output --update'; + const msgmerge_opts = '--backup=off --quiet -F --update'; const cmd = `msgmerge ${msgmerge_opts} <%= file.path %> misc/translator/Koha-${type}.pot`; const languages = getLanguages(); const globs = languages.map(language => `misc/translator/po/${language}-${type}.po`); diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl index b5bd96d34f..0ebf93ebc3 100755 --- a/misc/translator/xgettext.pl +++ b/misc/translator/xgettext.pl @@ -190,73 +190,88 @@ msgstr "" EOF my $directory_re = quotemeta("$directory/"); - for my $t (string_list) { - if ($text{$t}->[0]->type == C4::TmplTokenType::TEXT_PARAMETRIZED) { - my($token, $n) = ($text{$t}->[0], 0); - printf $OUTPUT "#. For the first occurrence,\n" - if @{$text{$t}} > 1 && $token->parameters_and_fields > 0; - for my $param ($token->parameters_and_fields) { - $n += 1; - my $type = $param->type; - my $subtype = ($type == C4::TmplTokenType::TAG - && $param->string =~ /^attributes->{'type'}->[1]: undef); - my $fmt = TmplTokenizer::_formalize( $param ); - $fmt =~ s/^%/%$n\$/; - if ($type == C4::TmplTokenType::DIRECTIVE) { -# $type = "Template::Toolkit Directive"; - $type = $param->string =~ /\[%(.*?)%\]/is? $1: 'ERROR'; - my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is? - $2: undef; - printf $OUTPUT "#. %s: %s\n", $fmt, - "$type" . (defined $name? " name=$name": ''); - } else { - my $name = $param->attributes->{'name'}; - my $value; - $value = $param->attributes->{'value'} - unless $subtype =~ /^(?:text)$/; - printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype" - . (defined $name? " name=$name->[1]": '') - . (defined $value? " value=$value->[1]": ''); - } - } - } elsif ($text{$t}->[0]->type == C4::TmplTokenType::TAG) { - my($token) = ($text{$t}->[0]); - printf $OUTPUT "#. For the first occurrence,\n" - if @{$text{$t}} > 1 && $token->parameters_and_fields > 0; - if ($token->string =~ /^attributes->{'http-equiv'}->[1]; - print $OUTPUT "#. META http-equiv=$type\n" if defined $type; - } elsif ($token->string =~ /^<([a-z0-9]+)/is) { - my $tag = uc($1); - my $type = (lc($tag) eq 'input'? - $token->attributes->{'type'}: undef); - my $name = $token->attributes->{'name'}; - printf $OUTPUT "#. %s\n", $tag - . (defined $type? " type=$type->[1]": '') - . (defined $name? " name=$name->[1]": ''); - } - } elsif ($text{$t}->[0]->has_js_data) { - printf $OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1; - printf $OUTPUT "#. SCRIPT\n"; - } - my $cformat_p; - for my $token (@{$text{$t}}) { - my $pathname = $token->pathname; - $pathname =~ s/^$directory_re//os; - $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/; - printf $OUTPUT "#: %s:%d\n", $pathname, $token->line_number - if defined $pathname && defined $token->line_number; - $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED; - } + + for my $t ( keys %text ) { + my @ordered_tokens = sort { + $a->pathname cmp $b->pathname + || $a->line_number cmp $b->line_number + } @{$text{$t}}; + my $token = $ordered_tokens[0]; + + if ( $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED ) { + my $n = 0; + printf $OUTPUT "#. For the first occurrence,\n" + if @{ $text{$t} } > 1 && $token->parameters_and_fields > 0; + for my $param ( $token->parameters_and_fields ) { + $n += 1; + my $type = $param->type; + my $subtype = ( $type == C4::TmplTokenType::TAG + && $param->string =~ /^attributes->{'type'}->[1] : undef ); + my $fmt = TmplTokenizer::_formalize($param); + $fmt =~ s/^%/%$n\$/; + if ( $type == C4::TmplTokenType::DIRECTIVE ) { + + # $type = "Template::Toolkit Directive"; + $type = $param->string =~ /\[%(.*?)%\]/is ? $1 : 'ERROR'; + my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is? + $2: undef; + printf $OUTPUT "#. %s: %s\n", $fmt, + "$type" . ( defined $name ? " name=$name" : '' ); + } else { + my $name = $param->attributes->{'name'}; + my $value; + $value = $param->attributes->{'value'} + unless $subtype =~ /^(?:text)$/; + printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype" + . ( defined $name ? " name=$name->[1]" : '' ) + . ( defined $value ? " value=$value->[1]" : '' ); + } + } + } elsif ( $token->type == C4::TmplTokenType::TAG ) { + printf $OUTPUT "#. For the first occurrence,\n" + if @{ $text{$t} } > 1 && $token->parameters_and_fields > 0; + if ( $token->string =~ /^attributes->{'http-equiv'}->[1]; + print $OUTPUT "#. META http-equiv=$type\n" if defined $type; + } elsif ( $token->string =~ /^<([a-z0-9]+)/is ) { + my $tag = uc($1); + my $type = ( lc($tag) eq 'input'? + $token->attributes->{'type'}: undef ); + my $name = $token->attributes->{'name'}; + printf $OUTPUT "#. %s\n", $tag + . (defined $type? " type=$type->[1]": '') + . (defined $name? " name=$name->[1]": ''); + } + } elsif ( $token->has_js_data ) { + printf $OUTPUT "#. For the first occurrence,\n" if @{ $text{$t} } > 1; + printf $OUTPUT "#. SCRIPT\n"; + } + my $cformat_p; + my $location = {}; + for my $token ( @{ $text{$t} } ) { + my $pathname = $token->pathname; + $pathname =~ s/^$directory_re//os; + $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/; + push @{ $location->{$pathname} }, $token->line_number + if defined $pathname && defined $token->line_number; + $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED; + } + + for my $pathname ( sort keys %$location ) { + for my $line_number ( @{ $location->{$pathname} } ) { + printf $OUTPUT "#: %s:%d\n", $pathname, $line_number; + } + } + printf $OUTPUT "#, c-format\n" if $cformat_p; printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po( TmplTokenizer::string_canon( - TmplTokenizer::charset_convert($t, $charset_in, $charset_out) + TmplTokenizer::charset_convert( $t, $charset_in, $charset_out ) ) ); printf $OUTPUT "msgstr %s\n\n", (defined $translation{$t}? - TmplTokenizer::quote_po( $translation{$t} ): "\"\""); + TmplTokenizer::quote_po( $translation{$t} ): "\"\""); } } -- 2.39.5