Koha/misc/translator/tmpl_process3.pl
Jonathan Druart 8c0d196ff6
Bug 39077: Do not complain about obsolete translations
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
Signed-off-by: Katrin Fischer <katrin.fischer@bsz-bw.de>
2025-02-13 15:31:01 +01:00

510 lines
17 KiB
Perl
Executable file

#!/usr/bin/perl
# This file is part of Koha
# Parts copyright 2003-2004 Paul Poulain
# Parts copyright 2003-2004 Jerome Vizcaino
# Parts copyright 2004 Ambrose Li
use FindBin;
use lib $FindBin::Bin;
=head1 NAME
tmpl_process3.pl - Alternative version of tmpl_process.pl
using gettext-compatible translation files
=cut
use strict;
#use warnings; FIXME - Bug 2505
use File::Basename qw( fileparse );
use Getopt::Long qw( GetOptions );
use Locale::PO;
use TmplTokenizer;
use VerboseWarnings qw( pedantic_p warn_additional warn_normal warn_pedantic error_additional error_normal );
###############################################################################
use vars qw( @in_dirs @filenames @match @nomatch $str_file $out_dir $quiet );
use vars qw( @excludes $exclude_regex );
use vars qw( $recursive_p );
use vars qw( $pedantic_p );
use vars qw( $href );
use vars qw( $type ); # file extension (DOS form without the dot) to match
use vars qw( $charset_in $charset_out );
###############################################################################
sub find_translation {
my ($s) = @_;
my $key = $s;
if ( $s =~ /\S/s ) {
$key = TmplTokenizer::string_canon($key);
$key = TmplTokenizer::charset_convert( $key, $charset_in, $charset_out );
$key = Locale::PO->quote($key);
}
if ( defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote( $href->{$key}->msgstr ) ) {
if ( $s =~ /^(\s+)/ ) {
return $1 . Locale::PO->dequote( $href->{$key}->msgstr );
} else {
return Locale::PO->dequote( $href->{$key}->msgstr );
}
} else {
return $s;
}
}
sub text_replace_tag {
my ( $t, $attr ) = @_;
my $it;
my @ttvar;
# value [tag=input], meta
my $tag = ( $t =~ /^<(\S+)/s ) ? lc($1) : undef;
my $translated_p = 0;
for my $a ( 'alt', 'content', 'title', 'value', 'label', 'placeholder', 'aria-label' ) {
if ( $attr->{$a} ) {
next if $a eq 'label' && $tag ne 'optgroup';
next if $a eq 'content' && $tag ne 'meta';
next
if $a eq 'value'
&& ( $tag ne 'input'
|| ( ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio)$/ ) ); # FIXME
my ( $key, $val, $val_orig, $order ) = @{ $attr->{$a} }; #FIXME
if ( $val =~ /\S/s ) {
# for selected attributes replace '[%..%]' with '%s' and remember matches
if ( $a =~ /title|value|alt|content|placeholder|aria-label/ ) {
while ( $val =~ s/(\[\%.*?\%\])/\%s/ ) {
my $var = $1;
push @ttvar, $1;
}
}
# find translation for transformed attributes
my $s = find_translation($val);
# replace '%s' with original content (in order) on translated string, this is fragile!
if ( $a =~ /title|value|alt|content|placeholder|aria-label/ and @ttvar ) {
while (@ttvar) {
my $var = shift @ttvar;
$s =~ s/\%s/$var/;
}
}
if ( $attr->{$a}->[1] ne $s ) { #FIXME
$attr->{$a}->[1] = $s; # FIXME
$attr->{$a}->[2] = ( $s =~ /"/s ) ? "'$s'" : "\"$s\""; #FIXME
$translated_p = 1;
}
}
}
}
if ($translated_p) {
$it = "<$tag" . join(
'',
map {
if ( $_ ne '/' ) {
sprintf( ' %s="%s"', $_, $attr->{$_}->[1] );
} else {
sprintf( ' %s', $_ );
}
} sort {
$attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
|| $a cmp $b # Sort attributes BZ 22236
} keys %$attr
);
$it .= '>';
} else {
$it = $t;
}
return $it;
}
sub text_replace {
my ( $h, $output ) = @_;
for ( ; ; ) {
my $s = TmplTokenizer::next_token($h);
last unless defined $s;
my ( $kind, $t, $attr ) = ( $s->type, $s->string, $s->attributes );
if ( $kind eq C4::TmplTokenType::TEXT ) {
print $output find_translation($t);
} elsif ( $kind eq C4::TmplTokenType::TEXT_PARAMETRIZED ) {
my $fmt = find_translation( $s->form );
print $output TmplTokenizer::parametrize(
$fmt, 1, $s,
sub {
$_ = $_[0];
my ( $kind, $t, $attr ) = ( $_->type, $_->string, $_->attributes );
$kind == C4::TmplTokenType::TAG && %$attr ? text_replace_tag( $t, $attr ) : $t;
}
);
} elsif ( $kind eq C4::TmplTokenType::TAG && %$attr ) {
print $output text_replace_tag( $t, $attr );
} elsif ( $s->has_js_data ) {
for my $t ( @{ $s->js_data } ) {
# FIXME for this whole block
if ( $t->[0] ) {
printf $output "%s%s%s", $t->[2], find_translation( $t->[3] ),
$t->[2];
} else {
print $output $t->[1];
}
}
} elsif ( defined $t ) {
# Quick fix to bug 4472
$t = "<!DOCTYPE stylesheet [" if $t =~ /DOCTYPE stylesheet/;
print $output $t;
}
}
}
sub listfiles {
my ( $dir, $type, $action ) = @_;
my $filenames = join( '|', @filenames ); # used to update strings from this file
my $match = join( '|', @match ); # use only this files
my $nomatch = join( '|', @nomatch ); # do no use this files
my @it = ();
my $dir_h;
if ( opendir( $dir_h, $dir ) ) {
my @dirent = readdir $dir_h; # because $dir_h is shared when recursing
closedir $dir_h;
for my $dirent (@dirent) {
my $path = "$dir/$dirent";
if ( $dirent =~ /^\./
|| $dirent eq 'CVS'
|| $dirent eq 'RCS'
|| ( defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/ ) )
{
;
} elsif ( -f $path ) {
my $basename = fileparse($path);
push @it, $path
if ( not @filenames or $basename =~ /($filenames)/i )
and ( not @match or $basename =~ /($match)/i ) # files to include
and ( not @nomatch or $basename !~ /($nomatch)/i ) # files not to include
and ( !defined $type || $dirent =~ /\.(?:$type)$/ ) || $action eq 'install';
} elsif ( -d $path && $recursive_p ) {
push @it, listfiles( $path, $type, $action );
}
}
} else {
warn_normal( "$dir: $!", undef );
}
return @it;
}
###############################################################################
sub mkdir_recursive {
my ($dir) = @_;
local ( $`, $&, $', $1 );
$dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
my ( $prefix, $basename ) = ( $dir =~ /\/([^\/]+)$/s ) ? ( $`, $1 ) : ( '.', $dir );
mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
if ( !-d $dir ) {
print STDERR "Making directory $dir...\n" unless $quiet;
# creates with rwxrwxr-x permissions
mkdir( $dir, 0775 ) || warn_normal( "$dir: $!", undef );
}
}
###############################################################################
sub usage {
my ($exitcode) = @_;
my $h = $exitcode ? *STDERR : *STDOUT;
print $h <<EOF;
Usage: $0 install [OPTION]
or: $0 --help
Install translated templates.
-i, --input=SOURCE Get or update strings from SOURCE directory(s).
On create or update can have multiple values.
On install only one value.
-o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
--pedantic-warnings Issue warnings even for detected problems
which are likely to be harmless
-r, --recursive SOURCE in the -i option is a directory
-f, --filename=FILE FILE is a specific filename or part of it.
If given, only these files will be processed.
On update only relevant strings will be updated.
-m, --match=FILE FILE is a specific filename or part of it.
If given, only these files will be processed.
-n, --nomatch=FILE FILE is a specific filename or part of it.
If given, these files will not be processed.
-s, --str-file=FILE Specify FILE as the translation (po) file
for input (install) or output (create, update)
-x, --exclude=REGEXP Exclude dirs matching the given REGEXP
--help Display this help and exit
-q, --quiet no output to screen (except for errors)
Try `perldoc $0` for perhaps more information.
EOF
exit($exitcode);
}
###############################################################################
sub usage_error {
for my $msg ( split( /\n/, $_[0] ) ) {
print STDERR "$msg\n";
}
print STDERR "Try `$0 --help for more information.\n";
exit(-1);
}
###############################################################################
GetOptions(
'input|i=s' => \@in_dirs,
'filename|f=s' => \@filenames,
'match|m=s' => \@match,
'nomatch|n=s' => \@nomatch,
'outputdir|o=s' => \$out_dir,
'recursive|r' => \$recursive_p,
'str-file|s=s' => \$str_file,
'exclude|x=s' => \@excludes,
'quiet|q' => \$quiet,
'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
'help' => \&usage,
) || usage_error();
VerboseWarnings::set_application_name($0);
VerboseWarnings::set_pedantic_mode($pedantic_p);
my $action = shift or usage_error('You must specify an ACTION.');
usage_error('You must at least specify input and string list filenames.')
if !@in_dirs || !defined $str_file;
# Type match defaults to *.tt plus *.inc if not specified
$type = "tt|inc|xsl|xml|def" if !defined($type);
# Check the inputs for being directories
for my $in_dir (@in_dirs) {
usage_error( "$in_dir: Input must be a directory.\n" . "(Symbolic links are not supported at the moment)" )
unless -d $in_dir;
}
# Generates the global exclude regular expression
$exclude_regex = '(?:' . join( '|', @excludes ) . ')' if @excludes;
my @in_files;
# Generate the list of input files if a directory is specified
# input is a directory, generates list of files to process
for my $fn (@filenames) {
die "You cannot specify input files and directories at the same time.\n"
if -d $fn;
}
for my $in_dir (@in_dirs) {
$in_dir =~ s/\/$//; # strips the trailing / if any
@in_files = ( @in_files, listfiles( $in_dir, $type, $action ) );
}
# restores the string list from file
$href = Locale::PO->load_file_ashash( $str_file, 'utf-8' );
# guess the charsets. HTML::Templates defaults to iso-8859-1
if ( defined $href ) {
die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
$charset_out = TmplTokenizer::charset_canon($2) if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
$charset_in = $charset_out;
# for my $msgid (keys %$href) {
# if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
# my $candidate = TmplTokenizer::charset_canon $2;
# die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
# if defined $charset_in && $charset_in ne $candidate;
# $charset_in = $candidate;
# }
# }
# BUG6464: check consistency of PO messages
# - count number of '%s' in msgid and msgstr
for my $msg ( values %$href ) {
my $id_count = split( /%s/, $msg->{msgid} ) - 1;
my $str_count = split( /%s/, $msg->{msgstr} ) - 1;
next
if $id_count == $str_count
|| $msg->{msgstr} eq '""'
|| $msg->{obsolete}
|| grep { /fuzzy/ } @{ $msg->{_flags} };
warn_normal( "unconsistent %s count: ($id_count/$str_count):\n"
. " line: "
. $msg->{loaded_line_number} . "\n"
. " msgid: "
. $msg->{msgid} . "\n"
. " msgstr: "
. $msg->{msgstr}
. "\n", undef );
}
}
# set our charset in to UTF-8
if ( !defined $charset_in ) {
$charset_in = TmplTokenizer::charset_canon('UTF-8');
warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n" unless ($quiet);
}
# set our charset out to UTF-8
if ( !defined $charset_out ) {
$charset_out = TmplTokenizer::charset_canon('UTF-8');
warn "Warning: Charset Out defaulting to $charset_out\n" unless ($quiet);
}
my $st;
if ( $action eq 'install' ) {
if ( !defined($out_dir) ) {
usage_error("You must specify an output directory when using the install method.");
}
if ( scalar @in_dirs > 1 ) {
usage_error("You must specify only one input directory when using the install method.");
}
my $in_dir = shift @in_dirs;
if ( $in_dir eq $out_dir ) {
warn "You must specify a different input and output directory.\n";
exit -1;
}
# Make sure the output directory exists
# (It will auto-create it, but for compatibility we should not)
-d $out_dir || die "$out_dir: The directory does not exist\n";
# Try to open the file, because Locale::PO doesn't check :-/
open( my $fh, '<', $str_file ) || die "$str_file: $!\n";
close $fh;
# creates the new tmpl file using the new translation
for my $input (@in_files) {
die "Assertion failed"
unless substr( $input, 0, length($in_dir) + 1 ) eq "$in_dir/";
my $target = $out_dir . substr( $input, length($in_dir) );
my $targetdir = ( $target =~ /[^\/]+$/s ) ? $` : undef;
if ( !defined $type || $input =~ /\.(?:$type)$/ ) {
my $h = TmplTokenizer->new($input);
$h->set_allow_cformat(1);
VerboseWarnings::set_input_file_name($input);
mkdir_recursive($targetdir) unless -d $targetdir;
print STDERR "Creating $target...\n" unless $quiet;
open( my $fh, ">:encoding(UTF-8)", "$target" ) || die "$target: $!\n";
text_replace( $h, $fh );
close $fh;
} else {
# just copying the file
mkdir_recursive($targetdir) unless -d $targetdir;
system("cp -f '$input' '$target'") == 0 or warn "Cannot copy $input to $target";
print STDERR "Copying $input...\n" unless $quiet;
}
}
} else {
usage_error('Unknown action specified.');
}
if ( $st == 0 ) {
printf "The %s seems to be successful.\n", $action unless $quiet;
} else {
printf "%s FAILED.\n", "\u$action" unless $quiet;
}
exit 0;
###############################################################################
=head1 SYNOPSIS
./tmpl_process3.pl [ I<tmpl_process.pl options> ]
=head1 DESCRIPTION
This is an alternative version of the tmpl_process.pl script,
using standard gettext-style PO files. While there still might
be changes made to the way it extracts strings, at this moment
it should be stable enough for general use; it is already being
used for the Chinese and Polish translations.
Currently, the create, update, and install actions have all been
reimplemented and seem to work.
=head2 Features
=over
=item -
Translation files in standard Uniforum PO format.
All standard tools including all gettext tools,
plus PO file editors like kbabel(1) etc.
can be used.
=item -
Minor changes in whitespace in source templates
do not generally require strings to be re-translated.
=item -
Able to handle <TMPL_VAR> variables in the templates;
<TMPL_VAR> variables are usually extracted in proper context,
represented by a short %s placeholder.
=item -
Able to handle text input and radio button INPUT elements
in the templates; these INPUT elements are also usually
extracted in proper context,
represented by a short %S or %p placeholder.
=item -
Automatic comments in the generated PO files to provide
even more context (line numbers, and the names and types
of the variables).
=item -
The %I<n>$s (or %I<n>$p, etc.) notation can be used
for change the ordering of the variables,
if such a reordering is required for correct translation.
=item -
If a particular <TMPL_VAR> should not appear in the
translation, it can be suppressed with the %0.0s notation.
=item -
Using the PO format also means translators can add their
own comments in the translation files, if necessary.
=back
=head1 NOTES
Anchors are represented by an <AI<n>> notation.
The meaning of this non-standard notation might not be obvious.
=head1 BUGS
This script may not work in Windows.
There are probably some other bugs too, since this has not been
tested very much.
=head1 SEE ALSO
TmplTokenizer.pm,
Locale::PO(3),
=cut