From 4b337b107e16d47ab79571b9a38d273c9e980b20 Mon Sep 17 00:00:00 2001
From: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
Date: Fri, 6 Oct 2023 16:48:57 +0200
Subject: [PATCH] Bug 34959: Sort PO files correctly

The PO files are not sorted when we update them which leads to
unnecessary changes that are commited: hard to see differences and
make git index grow superfluously.

Test plan:
0. Do not apply this patch
1. gulp po:update --lang es-ES
2. git commit -a -m"First PO update"
3. Run again the gulp update command
4. git diff
=> You have a lot of changes generated here, the po:update is not
idempotent.

5. Apply this patch
6. Run the gulp update command
7 git commit -a -m"PO update after 34959"
8. Run the gulp update command
9. git diff
=> No changes are generated

Note that this patch will all the entries by files, and per line
numbers.
It fixes a bug in some condition, where we add information/context about
the string. For instance search for "For the first occurrence" in the
file. Prior to this patch this was not correct, we didn't add info about
the first occurrence (but whichever in the list).

Signed-off-by: David Nind <david@davidnind.com>

Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
---
 gulpfile.js                 |   8 +--
 misc/translator/xgettext.pl | 137 ++++++++++++++++++++----------------
 2 files changed, 80 insertions(+), 65 deletions(-)

diff --git a/gulpfile.js b/gulpfile.js
index 6b3ab44cf8..95f557c5c7 100644
--- a/gulpfile.js
+++ b/gulpfile.js
@@ -144,7 +144,7 @@ const poTypes = Object.keys(poTasks);
 
 function po_extract_marc (type) {
     return src(`koha-tmpl/*-tmpl/*/en/**/*${type}*`, { read: false, nocase: true })
-        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', `Koha-marc-${type}.pot`))
+        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', `Koha-marc-${type}.pot`))
         .pipe(dest('misc/translator'))
 }
 
@@ -163,7 +163,7 @@ function po_extract_staff () {
     ];
 
     return src(globs, { read: false, nocase: true })
-        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', 'Koha-staff-prog.pot'))
+        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', 'Koha-staff-prog.pot'))
         .pipe(dest('misc/translator'))
 }
 
@@ -179,7 +179,7 @@ function po_extract_opac () {
     ];
 
     return src(globs, { read: false, nocase: true })
-        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -s', 'Koha-opac-bootstrap.pot'))
+        .pipe(xgettext('misc/translator/xgettext.pl --charset=UTF-8 -F', 'Koha-opac-bootstrap.pot'))
         .pipe(dest('misc/translator'))
 }
 
@@ -284,7 +284,7 @@ function po_create_installer_marc21 ()  { return po_create_type('installer-MARC2
 function po_create_installer_unimarc () { return po_create_type('installer-UNIMARC') }
 
 function po_update_type (type) {
-    const msgmerge_opts = '--backup=off --quiet --sort-output --update';
+    const msgmerge_opts = '--backup=off --quiet -F --update';
     const cmd = `msgmerge ${msgmerge_opts} <%= file.path %> misc/translator/Koha-${type}.pot`;
     const languages = getLanguages();
     const globs = languages.map(language => `misc/translator/po/${language}-${type}.po`);
diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl
index b5bd96d34f..0ebf93ebc3 100755
--- a/misc/translator/xgettext.pl
+++ b/misc/translator/xgettext.pl
@@ -190,73 +190,88 @@ msgstr ""
 
 EOF
     my $directory_re = quotemeta("$directory/");
-    for my $t (string_list) {
-	if ($text{$t}->[0]->type == C4::TmplTokenType::TEXT_PARAMETRIZED) {
-	    my($token, $n) = ($text{$t}->[0], 0);
-        printf $OUTPUT "#. For the first occurrence,\n"
-		    if @{$text{$t}} > 1 && $token->parameters_and_fields > 0;
-	    for my $param ($token->parameters_and_fields) {
-		$n += 1;
-		my $type = $param->type;
-		my $subtype = ($type == C4::TmplTokenType::TAG
-			&& $param->string =~ /^<input\b/is?
-				$param->attributes->{'type'}->[1]: undef);
-		my $fmt = TmplTokenizer::_formalize( $param );
-		$fmt =~ s/^%/%$n\$/;
-		if ($type == C4::TmplTokenType::DIRECTIVE) {
-#		    $type = "Template::Toolkit Directive";
-		    $type = $param->string =~ /\[%(.*?)%\]/is? $1: 'ERROR';
-		    my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is?
-			    $2: undef;
-            printf $OUTPUT "#. %s: %s\n", $fmt,
-			"$type" . (defined $name? " name=$name": '');
-		} else {
-		    my $name = $param->attributes->{'name'};
-            my $value;
-            $value = $param->attributes->{'value'}
-			    unless $subtype =~ /^(?:text)$/;
-            printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype"
-			    . (defined $name?  " name=$name->[1]": '')
-			    . (defined $value? " value=$value->[1]": '');
-		}
-	    }
-	} elsif ($text{$t}->[0]->type == C4::TmplTokenType::TAG) {
-	    my($token) = ($text{$t}->[0]);
-        printf $OUTPUT "#. For the first occurrence,\n"
-		    if @{$text{$t}} > 1 && $token->parameters_and_fields > 0;
-	    if ($token->string =~ /^<meta\b/is) {
-		my $type = $token->attributes->{'http-equiv'}->[1];
-        print $OUTPUT "#. META http-equiv=$type\n" if defined $type;
-	    } elsif ($token->string =~ /^<([a-z0-9]+)/is) {
-		my $tag = uc($1);
-		my $type = (lc($tag) eq 'input'?
-			$token->attributes->{'type'}: undef);
-		my $name = $token->attributes->{'name'};
-        printf $OUTPUT "#. %s\n", $tag
-		    . (defined $type? " type=$type->[1]": '')
-		    . (defined $name? " name=$name->[1]": '');
-	    }
-	} elsif ($text{$t}->[0]->has_js_data) {
-        printf $OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
-        printf $OUTPUT "#. SCRIPT\n";
-	}
-	my $cformat_p;
-	for my $token (@{$text{$t}}) {
-	    my $pathname = $token->pathname;
-	    $pathname =~ s/^$directory_re//os;
-        $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/;
-        printf $OUTPUT "#: %s:%d\n", $pathname, $token->line_number
-		    if defined $pathname && defined $token->line_number;
-	    $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED;
-	}
+
+    for my $t ( keys %text ) {
+        my @ordered_tokens = sort {
+                   $a->pathname cmp $b->pathname
+                || $a->line_number cmp $b->line_number
+        } @{$text{$t}};
+        my $token = $ordered_tokens[0];
+
+        if ( $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED ) {
+            my $n = 0;
+            printf $OUTPUT "#. For the first occurrence,\n"
+                if @{ $text{$t} } > 1 && $token->parameters_and_fields > 0;
+            for my $param ( $token->parameters_and_fields ) {
+                $n += 1;
+                my $type    = $param->type;
+                my $subtype = ( $type == C4::TmplTokenType::TAG
+                        && $param->string =~ /^<input\b/is?
+                        $param->attributes->{'type'}->[1] : undef );
+                my $fmt = TmplTokenizer::_formalize($param);
+                $fmt =~ s/^%/%$n\$/;
+                if ( $type == C4::TmplTokenType::DIRECTIVE ) {
+
+                    #		    $type = "Template::Toolkit Directive";
+                    $type = $param->string =~ /\[%(.*?)%\]/is ? $1 : 'ERROR';
+                    my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is?
+                        $2: undef;
+                    printf $OUTPUT "#. %s: %s\n", $fmt,
+                        "$type" . ( defined $name ? " name=$name" : '' );
+                } else {
+                    my $name = $param->attributes->{'name'};
+                    my $value;
+                    $value = $param->attributes->{'value'}
+                        unless $subtype =~ /^(?:text)$/;
+                    printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype"
+                        . ( defined $name  ? " name=$name->[1]"   : '' )
+                        . ( defined $value ? " value=$value->[1]" : '' );
+                }
+            }
+        } elsif ( $token->type == C4::TmplTokenType::TAG ) {
+            printf $OUTPUT "#. For the first occurrence,\n"
+                if @{ $text{$t} } > 1 && $token->parameters_and_fields > 0;
+            if ( $token->string =~ /^<meta\b/is ) {
+                my $type = $token->attributes->{'http-equiv'}->[1];
+                print $OUTPUT "#. META http-equiv=$type\n" if defined $type;
+            } elsif ( $token->string =~ /^<([a-z0-9]+)/is ) {
+                my $tag  = uc($1);
+                my $type = ( lc($tag) eq 'input'?
+                    $token->attributes->{'type'}: undef );
+                my $name = $token->attributes->{'name'};
+                printf $OUTPUT "#. %s\n", $tag
+                   . (defined $type? " type=$type->[1]": '')
+                   . (defined $name? " name=$name->[1]": '');
+            }
+        } elsif ( $token->has_js_data ) {
+            printf $OUTPUT "#. For the first occurrence,\n" if @{ $text{$t} } > 1;
+            printf $OUTPUT "#. SCRIPT\n";
+        }
+        my $cformat_p;
+        my $location = {};
+        for my $token ( @{ $text{$t} } ) {
+            my $pathname = $token->pathname;
+            $pathname =~ s/^$directory_re//os;
+            $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/;
+            push @{ $location->{$pathname} }, $token->line_number
+                if defined $pathname && defined $token->line_number;
+            $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED;
+        }
+
+        for my $pathname ( sort keys %$location ) {
+            for my $line_number ( @{ $location->{$pathname} } ) {
+                printf $OUTPUT "#: %s:%d\n", $pathname, $line_number;
+            }
+        }
+
         printf $OUTPUT "#, c-format\n" if $cformat_p;
         printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po(
             TmplTokenizer::string_canon(
-                TmplTokenizer::charset_convert($t, $charset_in, $charset_out)
+                TmplTokenizer::charset_convert( $t, $charset_in, $charset_out )
             )
         );
         printf $OUTPUT "msgstr %s\n\n", (defined $translation{$t}?
-		TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
+            TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
     }
 }
 
-- 
2.39.2