Bug 34959: Sort PO files correctly

The PO files are not sorted when we update them which leads to unnecessary changes that are commited: hard to see differences and make git index grow superfluously. Test plan: 0. Do not apply this patch 1. gulp po:update --lang es-ES 2. git commit -a -m"First PO update" 3. Run again the gulp update command 4. git diff => You have a lot of changes generated here, the po:update is not idempotent. 5. Apply this patch 6. Run the gulp update command 7 git commit -a -m"PO update after 34959" 8. Run the gulp update command 9. git diff => No changes are generated Note that this patch will all the entries by files, and per line numbers. It fixes a bug in some condition, where we add information/context about the string. For instance search for "For the first occurrence" in the file. Prior to this patch this was not correct, we didn't add info about the first occurrence (but whichever in the list). Signed-off-by: David Nind <david@davidnind.com> Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com> Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
2023-10-06 16:48:57 +02:00 · 2023-10-06 16:48:57 +02:00 · 4b337b107e
commit 4b337b107e
parent e4d638e148
2 changed files with 80 additions and 65 deletions
--- a/gulpfile.js
+++ b/gulpfile.js
@ -144,7 +144,7 @@ const poTypes = Object.keys(poTasks);

 function po_extract_marc (type) {
    return src(`koha-tmpl/*-tmpl/*/en/**/*${type}*`, { read: false, nocase: true })
-        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', `Koha-marc-${type}.pot`))
+        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', `Koha-marc-${type}.pot`))
        .pipe(dest('misc/translator'))
 }

@ -163,7 +163,7 @@ function po_extract_staff () {
    ];

    return src(globs, { read: false, nocase: true })
-        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', 'Koha-staff-prog.pot'))
+        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', 'Koha-staff-prog.pot'))
        .pipe(dest('misc/translator'))
 }

@ -179,7 +179,7 @@ function po_extract_opac () {
    ];

    return src(globs, { read: false, nocase: true })
-        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', 'Koha-opac-bootstrap.pot'))
+        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', 'Koha-opac-bootstrap.pot'))
        .pipe(dest('misc/translator'))
 }

@ -284,7 +284,7 @@ function po_create_installer_marc21 ()  { return po_create_type('installer-MARC2
 function po_create_installer_unimarc () { return po_create_type('installer-UNIMARC') }

 function po_update_type (type) {
-    const msgmerge_opts = '--backup=off --quiet --sort-output --update';
+    const msgmerge_opts = '--backup=off --quiet -F --update';
    const cmd = `msgmerge ${msgmerge_opts} <%= file.path %> misc/translator/Koha-${type}.pot`;
    const languages = getLanguages();
    const globs = languages.map(language => `misc/translator/po/${language}-${type}.po`);
--- a/misc/translator/xgettext.pl
+++ b/misc/translator/xgettext.pl
@ -190,73 +190,88 @@ msgstr ""

 EOF
    my $directory_re = quotemeta("$directory/");
-    for my $t (string_list) {
-	if ($text{$t}->[0]->type == C4::TmplTokenType::TEXT_PARAMETRIZED) {
-	    my($token, $n) = ($text{$t}->[0], 0);
-        printf $OUTPUT "#. For the first occurrence,\n"
-		    if @{$text{$t}} > 1 && $token->parameters_and_fields > 0;
-	    for my $param ($token->parameters_and_fields) {
-		$n += 1;
-		my $type = $param->type;
-		my $subtype = ($type == C4::TmplTokenType::TAG
-			&& $param->string =~ /^<input\b/is?
-				$param->attributes->{'type'}->[1]: undef);
-		my $fmt = TmplTokenizer::_formalize( $param );
-		$fmt =~ s/^%/%$n\$/;
-		if ($type == C4::TmplTokenType::DIRECTIVE) {
-#		    $type = "Template::Toolkit Directive";
-		    $type = $param->string =~ /\[%(.*?)%\]/is? $1: 'ERROR';
-		    my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is?
-			    $2: undef;
-            printf $OUTPUT "#. %s: %s\n", $fmt,
-			"$type" . (defined $name? " name=$name": '');
-		} else {
-		    my $name = $param->attributes->{'name'};
-            my $value;
-            $value = $param->attributes->{'value'}
-			    unless $subtype =~ /^(?:text)$/;
-            printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype"
-			    . (defined $name?  " name=$name->[1]": '')
-			    . (defined $value? " value=$value->[1]": '');
-		}
-	    }
-	} elsif ($text{$t}->[0]->type == C4::TmplTokenType::TAG) {
-	    my($token) = ($text{$t}->[0]);
-        printf $OUTPUT "#. For the first occurrence,\n"
-		    if @{$text{$t}} > 1 && $token->parameters_and_fields > 0;
-	    if ($token->string =~ /^<meta\b/is) {
-		my $type = $token->attributes->{'http-equiv'}->[1];
-        print $OUTPUT "#. META http-equiv=$type\n" if defined $type;
-	    } elsif ($token->string =~ /^<([a-z0-9]+)/is) {
-		my $tag = uc($1);
-		my $type = (lc($tag) eq 'input'?
-			$token->attributes->{'type'}: undef);
-		my $name = $token->attributes->{'name'};
-        printf $OUTPUT "#. %s\n", $tag
-		    . (defined $type? " type=$type->[1]": '')
-		    . (defined $name? " name=$name->[1]": '');
-	    }
-	} elsif ($text{$t}->[0]->has_js_data) {
-        printf $OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
-        printf $OUTPUT "#. SCRIPT\n";
-	}
-	my $cformat_p;
-	for my $token (@{$text{$t}}) {
-	    my $pathname = $token->pathname;
-	    $pathname =~ s/^$directory_re//os;
-        $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/;
-        printf $OUTPUT "#: %s:%d\n", $pathname, $token->line_number
-		    if defined $pathname && defined $token->line_number;
-	    $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED;
-	}
+
+    for my $t ( keys %text ) {
+        my @ordered_tokens = sort {
+                   $a->pathname cmp $b->pathname
+                || $a->line_number cmp $b->line_number
+        } @{$text{$t}};
+        my $token = $ordered_tokens[0];
+
+        if ( $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED ) {
+            my $n = 0;
+            printf $OUTPUT "#. For the first occurrence,\n"
+                if @{ $text{$t} } > 1 && $token->parameters_and_fields > 0;
+            for my $param ( $token->parameters_and_fields ) {
+                $n += 1;
+                my $type    = $param->type;
+                my $subtype = ( $type == C4::TmplTokenType::TAG
+                        && $param->string =~ /^<input\b/is?
+                        $param->attributes->{'type'}->[1] : undef );
+                my $fmt = TmplTokenizer::_formalize($param);
+                $fmt =~ s/^%/%$n\$/;
+                if ( $type == C4::TmplTokenType::DIRECTIVE ) {
+
+                    #		    $type = "Template::Toolkit Directive";
+                    $type = $param->string =~ /\[%(.*?)%\]/is ? $1 : 'ERROR';
+                    my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is?
+                        $2: undef;
+                    printf $OUTPUT "#. %s: %s\n", $fmt,
+                        "$type" . ( defined $name ? " name=$name" : '' );
+                } else {
+                    my $name = $param->attributes->{'name'};
+                    my $value;
+                    $value = $param->attributes->{'value'}
+                        unless $subtype =~ /^(?:text)$/;
+                    printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype"
+                        . ( defined $name  ? " name=$name->[1]"   : '' )
+                        . ( defined $value ? " value=$value->[1]" : '' );
+                }
+            }
+        } elsif ( $token->type == C4::TmplTokenType::TAG ) {
+            printf $OUTPUT "#. For the first occurrence,\n"
+                if @{ $text{$t} } > 1 && $token->parameters_and_fields > 0;
+            if ( $token->string =~ /^<meta\b/is ) {
+                my $type = $token->attributes->{'http-equiv'}->[1];
+                print $OUTPUT "#. META http-equiv=$type\n" if defined $type;
+            } elsif ( $token->string =~ /^<([a-z0-9]+)/is ) {
+                my $tag  = uc($1);
+                my $type = ( lc($tag) eq 'input'?
+                    $token->attributes->{'type'}: undef );
+                my $name = $token->attributes->{'name'};
+                printf $OUTPUT "#. %s\n", $tag
+                   . (defined $type? " type=$type->[1]": '')
+                   . (defined $name? " name=$name->[1]": '');
+            }
+        } elsif ( $token->has_js_data ) {
+            printf $OUTPUT "#. For the first occurrence,\n" if @{ $text{$t} } > 1;
+            printf $OUTPUT "#. SCRIPT\n";
+        }
+        my $cformat_p;
+        my $location = {};
+        for my $token ( @{ $text{$t} } ) {
+            my $pathname = $token->pathname;
+            $pathname =~ s/^$directory_re//os;
+            $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/;
+            push @{ $location->{$pathname} }, $token->line_number
+                if defined $pathname && defined $token->line_number;
+            $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED;
+        }
+
+        for my $pathname ( sort keys %$location ) {
+            for my $line_number ( @{ $location->{$pathname} } ) {
+                printf $OUTPUT "#: %s:%d\n", $pathname, $line_number;
+            }
+        }
+
        printf $OUTPUT "#, c-format\n" if $cformat_p;
        printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po(
            TmplTokenizer::string_canon(
-                TmplTokenizer::charset_convert($t, $charset_in, $charset_out)
+                TmplTokenizer::charset_convert( $t, $charset_in, $charset_out )
            )
        );
        printf $OUTPUT "msgstr %s\n\n", (defined $translation{$t}?
-		TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
+            TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
    }
 }