2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
9 tmpl_process3.pl - Alternative version of tmpl_process.pl
10 using gettext-compatible translation files
15 #use warnings; FIXME - Bug 2505
19 use File::Temp qw( :POSIX );
21 use VerboseWarnings qw( :warn :die );
23 ###############################################################################
25 use vars qw( @in_dirs @filenames @match @nomatch $str_file $out_dir $quiet );
26 use vars qw( @excludes $exclude_regex );
27 use vars qw( $recursive_p );
28 use vars qw( $pedantic_p );
30 use vars qw( $type ); # file extension (DOS form without the dot) to match
31 use vars qw( $charset_in $charset_out );
33 ###############################################################################
35 sub find_translation ($) {
39 $key = TmplTokenizer::string_canon($key);
40 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
41 $key = TmplTokenizer::quote_po($key);
43 if (defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote($href->{$key}->msgstr)){
45 return $1 . Locale::PO->dequote($href->{$key}->msgstr);
48 return Locale::PO->dequote($href->{$key}->msgstr);
56 sub text_replace_tag ($$) {
60 # value [tag=input], meta
61 my $tag = lc($1) if $t =~ /^<(\S+)/s;
63 for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder') {
65 next if $a eq 'label' && $tag ne 'optgroup';
66 next if $a eq 'content' && $tag ne 'meta';
67 next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio)$/)); # FIXME
69 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
71 my $s = find_translation($val);
72 if ($attr->{$a}->[1] ne $s) { #FIXME
73 $attr->{$a}->[1] = $s; # FIXME
74 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
82 . join('', map { if ($_ ne '/'){
83 sprintf(' %s="%s"', $_, $attr->{$_}->[1]);
90 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
100 sub text_replace (**) {
101 my($h, $output) = @_;
103 my $s = TmplTokenizer::next_token $h;
104 last unless defined $s;
105 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
106 if ($kind eq C4::TmplTokenType::TEXT) {
107 print $output find_translation($t);
108 } elsif ($kind eq C4::TmplTokenType::TEXT_PARAMETRIZED) {
109 my $fmt = find_translation($s->form);
110 print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
112 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
113 $kind == C4::TmplTokenType::TAG && %$attr?
114 text_replace_tag($t, $attr): $t });
115 } elsif ($kind eq C4::TmplTokenType::TAG && %$attr) {
116 print $output text_replace_tag($t, $attr);
117 } elsif ($s->has_js_data) {
118 for my $t (@{$s->js_data}) {
119 # FIXME for this whole block
121 printf $output "%s%s%s", $t->[2], find_translation $t->[3],
124 print $output $t->[1];
127 } elsif (defined $t) {
128 # Quick fix to bug 4472
129 $t = "<!DOCTYPE stylesheet [" if $t =~ /DOCTYPE stylesheet/ ;
136 my($dir, $type, $action) = @_;
137 my $filenames = join ('|', @filenames); # used to update strings from this file
138 my $match = join ('|', @match); # use only this files
139 my $nomatch = join ('|', @nomatch); # do no use this files
141 if (opendir(DIR, $dir)) {
142 my @dirent = readdir DIR; # because DIR is shared when recursing
144 for my $dirent (@dirent) {
145 my $path = "$dir/$dirent";
146 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
147 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
150 my $basename = fileparse( $path );
152 if ( not @filenames or $basename =~ /($filenames)/i )
153 and ( not @match or $basename =~ /($match)/i ) # files to include
154 and ( not @nomatch or $basename !~ /($nomatch)/i ) # files not to include
155 and (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install';
156 } elsif (-d $path && $recursive_p) {
157 push @it, listfiles($path, $type, $action);
161 warn_normal "$dir: $!", undef;
166 ###############################################################################
168 sub mkdir_recursive ($) {
170 local($`, $&, $', $1);
171 $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
172 my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir);
173 mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
175 print STDERR "Making directory $dir...\n" unless $quiet;
176 # creates with rwxrwxr-x permissions
177 mkdir($dir, 0775) || warn_normal "$dir: $!", undef;
181 ###############################################################################
185 my $h = $exitcode? *STDERR: *STDOUT;
187 Usage: $0 create [OPTION]
188 or: $0 update [OPTION]
189 or: $0 install [OPTION]
191 Create or update PO files from templates, or install translated templates.
193 -i, --input=SOURCE Get or update strings from SOURCE directory(s).
194 On create or update can have multiple values.
195 On install only one value.
196 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
197 --pedantic-warnings Issue warnings even for detected problems
198 which are likely to be harmless
199 -r, --recursive SOURCE in the -i option is a directory
200 -f, --filename=FILE FILE is a specific filename or part of it.
201 If given, only these files will be processed.
202 On update only relevant strings will be updated.
203 -m, --match=FILE FILE is a specific filename or part of it.
204 If given, only these files will be processed.
205 -n, --nomatch=FILE FILE is a specific filename or part of it.
206 If given, these files will not be processed.
207 -s, --str-file=FILE Specify FILE as the translation (po) file
208 for input (install) or output (create, update)
209 -x, --exclude=REGEXP Exclude dirs matching the given REGEXP
210 --help Display this help and exit
211 -q, --quiet no output to screen (except for errors)
213 The -o option is ignored for the "create" and "update" actions.
214 Try `perldoc $0` for perhaps more information.
219 ###############################################################################
221 sub usage_error (;$) {
222 for my $msg (split(/\n/, $_[0])) {
223 print STDERR "$msg\n";
225 print STDERR "Try `$0 --help for more information.\n";
229 ###############################################################################
232 'input|i=s' => \@in_dirs,
233 'filename|f=s' => \@filenames,
234 'match|m=s' => \@match,
235 'nomatch|n=s' => \@nomatch,
236 'outputdir|o=s' => \$out_dir,
237 'recursive|r' => \$recursive_p,
238 'str-file|s=s' => \$str_file,
239 'exclude|x=s' => \@excludes,
240 'quiet|q' => \$quiet,
241 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
245 VerboseWarnings::set_application_name $0;
246 VerboseWarnings::set_pedantic_mode $pedantic_p;
248 # keep the buggy Locale::PO quiet if it says stupid things
249 $SIG{__WARN__} = sub {
251 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
254 my $action = shift or usage_error('You must specify an ACTION.');
255 usage_error('You must at least specify input and string list filenames.')
256 if !@in_dirs || !defined $str_file;
258 # Type match defaults to *.tt plus *.inc if not specified
259 $type = "tt|inc|xsl|xml|def" if !defined($type);
261 # Check the inputs for being directories
262 for my $in_dir ( @in_dirs ) {
263 usage_error("$in_dir: Input must be a directory.\n"
264 . "(Symbolic links are not supported at the moment)")
268 # Generates the global exclude regular expression
269 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
272 # Generate the list of input files if a directory is specified
273 # input is a directory, generates list of files to process
275 for my $fn ( @filenames ) {
276 die "You cannot specify input files and directories at the same time.\n"
279 for my $in_dir ( @in_dirs ) {
280 $in_dir =~ s/\/$//; # strips the trailing / if any
281 @in_files = ( @in_files, listfiles($in_dir, $type, $action));
284 # restores the string list from file
285 $href = Locale::PO->load_file_ashash($str_file);
287 # guess the charsets. HTML::Templates defaults to iso-8859-1
289 die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
290 $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
291 $charset_in = $charset_out;
292 # for my $msgid (keys %$href) {
293 # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
294 # my $candidate = TmplTokenizer::charset_canon $2;
295 # die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
296 # if defined $charset_in && $charset_in ne $candidate;
297 # $charset_in = $candidate;
301 # BUG6464: check consistency of PO messages
302 # - count number of '%s' in msgid and msgstr
303 for my $msg ( values %$href ) {
304 my $id_count = split(/%s/, $msg->{msgid}) - 1;
305 my $str_count = split(/%s/, $msg->{msgstr}) - 1;
306 next if $id_count == $str_count ||
307 $msg->{msgstr} eq '""' ||
308 grep { /fuzzy/ } @{$msg->{_flags}};
310 "unconsistent %s count: ($id_count/$str_count):\n" .
311 " line: " . $msg->{loaded_line_number} . "\n" .
312 " msgid: " . $msg->{msgid} . "\n" .
313 " msgstr: " . $msg->{msgstr} . "\n", undef;
317 # set our charset in to UTF-8
318 if (!defined $charset_in) {
319 $charset_in = TmplTokenizer::charset_canon 'UTF-8';
320 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
322 # set our charset out to UTF-8
323 if (!defined $charset_out) {
324 $charset_out = TmplTokenizer::charset_canon 'UTF-8';
325 warn "Warning: Charset Out defaulting to $charset_out\n";
327 my $xgettext = './xgettext.pl'; # actual text extractor script
330 if ($action eq 'create') {
331 # updates the list. As the list is empty, every entry will be added
333 warn "Removing empty file $str_file\n";
334 unlink $str_file || die "$str_file: $!\n";
336 die "$str_file: Output file already exists\n" if -f $str_file;
337 my($tmph1, $tmpfile1) = tmpnam();
338 my($tmph2, $tmpfile2) = tmpnam();
339 close $tmph2; # We just want a name
340 # Generate the temporary file that acts as <MODULE>/POTFILES.in
341 for my $input (@in_files) {
342 print $tmph1 "$input\n";
345 warn "I $charset_in O $charset_out";
346 # Generate the specified po file ($str_file)
347 $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
348 (defined $charset_in? ('-I', $charset_in): ()),
349 (defined $charset_out? ('-O', $charset_out): ())
351 # Run msgmerge so that the pot file looks like a real pot file
352 # We need to help msgmerge a bit by pre-creating a dummy po file that has
353 # the headers and the "" msgid & msgstr. It will fill in the rest.
355 # Merge the temporary "pot file" with the specified po file ($str_file)
356 # FIXME: msgmerge(1) is a Unix dependency
357 # FIXME: need to check the return value
358 unless (-f $str_file) {
359 local(*INPUT, *OUTPUT);
360 open(INPUT, "<$tmpfile2");
361 open(OUTPUT, ">$str_file");
369 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
371 error_normal "Text extraction failed: $xgettext: $!\n", undef;
372 error_additional "Will not run msgmerge\n", undef;
374 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
375 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
377 } elsif ($action eq 'update') {
378 my($tmph1, $tmpfile1) = tmpnam();
379 my($tmph2, $tmpfile2) = tmpnam();
380 close $tmph2; # We just want a name
381 # Generate the temporary file that acts as <MODULE>/POTFILES.in
382 for my $input (@in_files) {
383 print $tmph1 "$input\n";
386 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
387 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
389 (defined $charset_in? ('-I', $charset_in): ()),
390 (defined $charset_out? ('-O', $charset_out): ()));
392 # Merge the temporary "pot file" with the specified po file ($str_file)
393 # FIXME: msgmerge(1) is a Unix dependency
394 # FIXME: need to check the return value
396 my ($tmph3, $tmpfile3) = tmpnam();
397 $st = system("msgcat $str_file $tmpfile2 > $tmpfile3");
398 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile3 -o - | msgattrib --no-obsolete -o $str_file")
401 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
404 error_normal "Text extraction failed: $xgettext: $!\n", undef;
405 error_additional "Will not run msgmerge\n", undef;
407 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
408 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
410 } elsif ($action eq 'install') {
411 if(!defined($out_dir)) {
412 usage_error("You must specify an output directory when using the install method.");
415 if ( scalar @in_dirs > 1 ) {
416 usage_error("You must specify only one input directory when using the install method.");
419 my $in_dir = shift @in_dirs;
421 if ($in_dir eq $out_dir) {
422 warn "You must specify a different input and output directory.\n";
426 # Make sure the output directory exists
427 # (It will auto-create it, but for compatibility we should not)
428 -d $out_dir || die "$out_dir: The directory does not exist\n";
430 # Try to open the file, because Locale::PO doesn't check :-/
431 open(INPUT, "<$str_file") || die "$str_file: $!\n";
434 # creates the new tmpl file using the new translation
435 for my $input (@in_files) {
436 die "Assertion failed"
437 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
439 my $target = $out_dir . substr($input, length($in_dir));
440 my $targetdir = $` if $target =~ /[^\/]+$/s;
442 if (!defined $type || $input =~ /\.(?:$type)$/) {
443 my $h = TmplTokenizer->new( $input );
444 $h->set_allow_cformat( 1 );
445 VerboseWarnings::set_input_file_name $input;
446 mkdir_recursive($targetdir) unless -d $targetdir;
447 print STDERR "Creating $target...\n" unless $quiet;
448 open( OUTPUT, ">$target" ) || die "$target: $!\n";
449 text_replace( $h, *OUTPUT );
452 # just copying the file
453 mkdir_recursive($targetdir) unless -d $targetdir;
454 system("cp -f $input $target");
455 print STDERR "Copying $input...\n" unless $quiet;
460 usage_error('Unknown action specified.');
464 printf "The %s seems to be successful.\n", $action unless $quiet;
466 printf "%s FAILED.\n", "\u$action" unless $quiet;
470 ###############################################################################
474 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
478 This is an alternative version of the tmpl_process.pl script,
479 using standard gettext-style PO files. While there still might
480 be changes made to the way it extracts strings, at this moment
481 it should be stable enough for general use; it is already being
482 used for the Chinese and Polish translations.
484 Currently, the create, update, and install actions have all been
485 reimplemented and seem to work.
493 Translation files in standard Uniforum PO format.
494 All standard tools including all gettext tools,
495 plus PO file editors like kbabel(1) etc.
500 Minor changes in whitespace in source templates
501 do not generally require strings to be re-translated.
505 Able to handle <TMPL_VAR> variables in the templates;
506 <TMPL_VAR> variables are usually extracted in proper context,
507 represented by a short %s placeholder.
511 Able to handle text input and radio button INPUT elements
512 in the templates; these INPUT elements are also usually
513 extracted in proper context,
514 represented by a short %S or %p placeholder.
518 Automatic comments in the generated PO files to provide
519 even more context (line numbers, and the names and types
524 The %I<n>$s (or %I<n>$p, etc.) notation can be used
525 for change the ordering of the variables,
526 if such a reordering is required for correct translation.
530 If a particular <TMPL_VAR> should not appear in the
531 translation, it can be suppressed with the %0.0s notation.
535 Using the PO format also means translators can add their
536 own comments in the translation files, if necessary.
540 Create, update, and install actions are all based on the
541 same scanner module. This ensures that update and install
542 have the same idea of what is a translatable string;
543 attribute names in tags, for example, will not be
544 accidentally translated.
550 Anchors are represented by an <AI<n>> notation.
551 The meaning of this non-standard notation might not be obvious.
553 The create action calls xgettext.pl to do the actual work;
554 the update action calls xgettext.pl, msgmerge(1) and msgattrib(1)
555 to do the actual work.
559 xgettext.pl must be present in the current directory; both
560 msgmerge(1) and msgattrib(1) must also be present in the search path.
561 The script currently does not check carefully whether these
562 dependent commands are present.
564 Locale::PO(3) has a lot of bugs. It can neither parse nor
565 generate GNU PO files properly; a couple of workarounds have
566 been written in TmplTokenizer and more is likely to be needed
567 (e.g., to get rid of the "Strange line" warning for #~).
569 This script may not work in Windows.
571 There are probably some other bugs too, since this has not been
582 http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms