2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
9 tmpl_process3.pl - Alternative version of tmpl_process.pl
10 using gettext-compatible translation files
15 #use warnings; FIXME - Bug 2505
19 use File::Temp qw( :POSIX );
21 use VerboseWarnings qw( :warn :die );
23 ###############################################################################
25 use vars qw( @in_dirs @filenames $str_file $out_dir $quiet );
26 use vars qw( @excludes $exclude_regex );
27 use vars qw( $recursive_p );
28 use vars qw( $pedantic_p );
30 use vars qw( $type ); # file extension (DOS form without the dot) to match
31 use vars qw( $charset_in $charset_out );
33 ###############################################################################
35 sub find_translation ($) {
39 $key = TmplTokenizer::string_canon($key);
40 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
41 $key = TmplTokenizer::quote_po($key);
43 if (defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote($href->{$key}->msgstr)){
45 return $1 . Locale::PO->dequote($href->{$key}->msgstr);
48 return Locale::PO->dequote($href->{$key}->msgstr);
56 sub text_replace_tag ($$) {
60 # value [tag=input], meta
61 my $tag = lc($1) if $t =~ /^<(\S+)/s;
63 for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder') {
65 next if $a eq 'label' && $tag ne 'optgroup';
66 next if $a eq 'content' && $tag ne 'meta';
67 next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio|text)$/)); # FIXME
69 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
71 my $s = find_translation($val);
72 if ($attr->{$a}->[1] ne $s) { #FIXME
73 $attr->{$a}->[1] = $s; # FIXME
74 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
82 . join('', map { if ($_ ne '/'){
83 sprintf(' %s="%s"', $_, $attr->{$_}->[1]);
90 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
100 sub text_replace (**) {
101 my($h, $output) = @_;
103 my $s = TmplTokenizer::next_token $h;
104 last unless defined $s;
105 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
106 if ($kind eq C4::TmplTokenType::TEXT) {
107 print $output find_translation($t);
108 } elsif ($kind eq C4::TmplTokenType::TEXT_PARAMETRIZED) {
109 my $fmt = find_translation($s->form);
110 print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
112 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
113 $kind == C4::TmplTokenType::TAG && %$attr?
114 text_replace_tag($t, $attr): $t });
115 } elsif ($kind eq C4::TmplTokenType::TAG && %$attr) {
116 print $output text_replace_tag($t, $attr);
117 } elsif ($s->has_js_data) {
118 for my $t (@{$s->js_data}) {
119 # FIXME for this whole block
121 printf $output "%s%s%s", $t->[2], find_translation $t->[3],
124 print $output $t->[1];
127 } elsif (defined $t) {
128 # Quick fix to bug 4472
129 $t = "<!DOCTYPE stylesheet [" if $t =~ /DOCTYPE stylesheet/ ;
136 my($dir, $type, $action, $filenames) = @_;
138 if (opendir(DIR, $dir)) {
139 my @dirent = readdir DIR; # because DIR is shared when recursing
141 for my $dirent (@dirent) {
142 my $path = "$dir/$dirent";
143 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
144 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
147 my $basename = basename $path;
149 if ( not @$filenames or ( grep { $path =~ /$_/ } @$filenames ) )
150 and (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install';
151 } elsif (-d $path && $recursive_p) {
152 push @it, listfiles($path, $type, $action, $filenames);
156 warn_normal "$dir: $!", undef;
161 ###############################################################################
163 sub mkdir_recursive ($) {
165 local($`, $&, $', $1);
166 $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
167 my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir);
168 mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
170 print STDERR "Making directory $dir..." unless $quiet;
171 # creates with rwxrwxr-x permissions
172 mkdir($dir, 0775) || warn_normal "$dir: $!", undef;
176 ###############################################################################
180 my $h = $exitcode? *STDERR: *STDOUT;
182 Usage: $0 create [OPTION]
183 or: $0 update [OPTION]
184 or: $0 install [OPTION]
186 Create or update PO files from templates, or install translated templates.
188 -i, --input=SOURCE Get or update strings from SOURCE directory(s).
189 On create or update can have multiple values.
190 On install only one value.
191 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
192 --pedantic-warnings Issue warnings even for detected problems
193 which are likely to be harmless
194 -r, --recursive SOURCE in the -i option is a directory
195 -f, --filename=FILE FILE is a specific filaneme.
196 If given, only these files will be processed.
197 -s, --str-file=FILE Specify FILE as the translation (po) file
198 for input (install) or output (create, update)
199 -x, --exclude=REGEXP Exclude files matching the given REGEXP
200 --help Display this help and exit
201 -q, --quiet no output to screen (except for errors)
203 The -o option is ignored for the "create" and "update" actions.
204 Try `perldoc $0` for perhaps more information.
209 ###############################################################################
211 sub usage_error (;$) {
212 for my $msg (split(/\n/, $_[0])) {
213 print STDERR "$msg\n";
215 print STDERR "Try `$0 --help for more information.\n";
219 ###############################################################################
222 'input|i=s' => \@in_dirs,
223 'filename|f=s' => \@filenames,
224 'outputdir|o=s' => \$out_dir,
225 'recursive|r' => \$recursive_p,
226 'str-file|s=s' => \$str_file,
227 'exclude|x=s' => \@excludes,
228 'quiet|q' => \$quiet,
229 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
233 VerboseWarnings::set_application_name $0;
234 VerboseWarnings::set_pedantic_mode $pedantic_p;
236 # keep the buggy Locale::PO quiet if it says stupid things
237 $SIG{__WARN__} = sub {
239 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
242 my $action = shift or usage_error('You must specify an ACTION.');
243 usage_error('You must at least specify input and string list filenames.')
244 if !@in_dirs || !defined $str_file;
246 # Type match defaults to *.tt plus *.inc if not specified
247 $type = "tt|inc|xsl|xml|def" if !defined($type);
249 # Check the inputs for being directories
250 for my $in_dir ( @in_dirs ) {
251 usage_error("$in_dir: Input must be a directory.\n"
252 . "(Symbolic links are not supported at the moment)")
256 # Generates the global exclude regular expression
257 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
260 # Generate the list of input files if a directory is specified
261 # input is a directory, generates list of files to process
263 for my $fn ( @filenames ) {
264 die "You cannot specify input files and directories at the same time.\n"
267 for my $in_dir ( @in_dirs ) {
268 $in_dir =~ s/\/$//; # strips the trailing / if any
269 @in_files = ( @in_files, listfiles($in_dir, $type, $action, \@filenames) );
272 # restores the string list from file
273 $href = Locale::PO->load_file_ashash($str_file);
275 # guess the charsets. HTML::Templates defaults to iso-8859-1
277 die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
278 $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
279 $charset_in = $charset_out;
280 # for my $msgid (keys %$href) {
281 # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
282 # my $candidate = TmplTokenizer::charset_canon $2;
283 # die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
284 # if defined $charset_in && $charset_in ne $candidate;
285 # $charset_in = $candidate;
289 # BUG6464: check consistency of PO messages
290 # - count number of '%s' in msgid and msgstr
291 for my $msg ( values %$href ) {
292 my $id_count = split(/%s/, $msg->{msgid}) - 1;
293 my $str_count = split(/%s/, $msg->{msgstr}) - 1;
294 next if $id_count == $str_count ||
295 $msg->{msgstr} eq '""' ||
296 grep { /fuzzy/ } @{$msg->{_flags}};
298 "unconsistent %s count: ($id_count/$str_count):\n" .
299 " line: " . $msg->{loaded_line_number} . "\n" .
300 " msgid: " . $msg->{msgid} . "\n" .
301 " msgstr: " . $msg->{msgstr} . "\n", undef;
305 # set our charset in to UTF-8
306 if (!defined $charset_in) {
307 $charset_in = TmplTokenizer::charset_canon 'UTF-8';
308 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
310 # set our charset out to UTF-8
311 if (!defined $charset_out) {
312 $charset_out = TmplTokenizer::charset_canon 'UTF-8';
313 warn "Warning: Charset Out defaulting to $charset_out\n";
315 my $xgettext = './xgettext.pl'; # actual text extractor script
318 if ($action eq 'create') {
319 # updates the list. As the list is empty, every entry will be added
321 warn "Removing empty file $str_file\n";
322 unlink $str_file || die "$str_file: $!\n";
324 die "$str_file: Output file already exists\n" if -f $str_file;
325 my($tmph1, $tmpfile1) = tmpnam();
326 my($tmph2, $tmpfile2) = tmpnam();
327 close $tmph2; # We just want a name
328 # Generate the temporary file that acts as <MODULE>/POTFILES.in
329 for my $input (@in_files) {
330 print $tmph1 "$input\n";
333 warn "I $charset_in O $charset_out";
334 # Generate the specified po file ($str_file)
335 $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
336 (defined $charset_in? ('-I', $charset_in): ()),
337 (defined $charset_out? ('-O', $charset_out): ())
339 # Run msgmerge so that the pot file looks like a real pot file
340 # We need to help msgmerge a bit by pre-creating a dummy po file that has
341 # the headers and the "" msgid & msgstr. It will fill in the rest.
343 # Merge the temporary "pot file" with the specified po file ($str_file)
344 # FIXME: msgmerge(1) is a Unix dependency
345 # FIXME: need to check the return value
346 unless (-f $str_file) {
347 local(*INPUT, *OUTPUT);
348 open(INPUT, "<$tmpfile2");
349 open(OUTPUT, ">$str_file");
357 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
359 error_normal "Text extraction failed: $xgettext: $!\n", undef;
360 error_additional "Will not run msgmerge\n", undef;
362 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
363 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
365 } elsif ($action eq 'update') {
366 my($tmph1, $tmpfile1) = tmpnam();
367 my($tmph2, $tmpfile2) = tmpnam();
368 close $tmph2; # We just want a name
369 # Generate the temporary file that acts as <MODULE>/POTFILES.in
370 for my $input (@in_files) {
371 print $tmph1 "$input\n";
374 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
375 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
377 (defined $charset_in? ('-I', $charset_in): ()),
378 (defined $charset_out? ('-O', $charset_out): ()));
380 # Merge the temporary "pot file" with the specified po file ($str_file)
381 # FIXME: msgmerge(1) is a Unix dependency
382 # FIXME: need to check the return value
384 my ($tmph3, $tmpfile3) = tmpnam();
385 $st = system("msgcat $str_file $tmpfile2 > $tmpfile3");
386 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile3 -o - | msgattrib --no-obsolete -o $str_file")
389 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
392 error_normal "Text extraction failed: $xgettext: $!\n", undef;
393 error_additional "Will not run msgmerge\n", undef;
395 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
396 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
398 } elsif ($action eq 'install') {
399 if(!defined($out_dir)) {
400 usage_error("You must specify an output directory when using the install method.");
403 if ( scalar @in_dirs > 1 ) {
404 usage_error("You must specify only one input directory when using the install method.");
407 my $in_dir = shift @in_dirs;
409 if ($in_dir eq $out_dir) {
410 warn "You must specify a different input and output directory.\n";
414 # Make sure the output directory exists
415 # (It will auto-create it, but for compatibility we should not)
416 -d $out_dir || die "$out_dir: The directory does not exist\n";
418 # Try to open the file, because Locale::PO doesn't check :-/
419 open(INPUT, "<$str_file") || die "$str_file: $!\n";
422 # creates the new tmpl file using the new translation
423 for my $input (@in_files) {
424 die "Assertion failed"
425 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
427 my $target = $out_dir . substr($input, length($in_dir));
428 my $targetdir = $` if $target =~ /[^\/]+$/s;
430 if (!defined $type || $input =~ /\.(?:$type)$/) {
431 my $h = TmplTokenizer->new( $input );
432 $h->set_allow_cformat( 1 );
433 VerboseWarnings::set_input_file_name $input;
434 mkdir_recursive($targetdir) unless -d $targetdir;
435 print STDERR "Creating $target...\n" unless $quiet;
436 open( OUTPUT, ">$target" ) || die "$target: $!\n";
437 text_replace( $h, *OUTPUT );
440 # just copying the file
441 mkdir_recursive($targetdir) unless -d $targetdir;
442 system("cp -f $input $target");
443 print STDERR "Copying $input...\n" unless $quiet;
448 usage_error('Unknown action specified.');
452 printf "The %s seems to be successful.\n", $action unless $quiet;
454 printf "%s FAILED.\n", "\u$action" unless $quiet;
458 ###############################################################################
462 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
466 This is an alternative version of the tmpl_process.pl script,
467 using standard gettext-style PO files. While there still might
468 be changes made to the way it extracts strings, at this moment
469 it should be stable enough for general use; it is already being
470 used for the Chinese and Polish translations.
472 Currently, the create, update, and install actions have all been
473 reimplemented and seem to work.
481 Translation files in standard Uniforum PO format.
482 All standard tools including all gettext tools,
483 plus PO file editors like kbabel(1) etc.
488 Minor changes in whitespace in source templates
489 do not generally require strings to be re-translated.
493 Able to handle <TMPL_VAR> variables in the templates;
494 <TMPL_VAR> variables are usually extracted in proper context,
495 represented by a short %s placeholder.
499 Able to handle text input and radio button INPUT elements
500 in the templates; these INPUT elements are also usually
501 extracted in proper context,
502 represented by a short %S or %p placeholder.
506 Automatic comments in the generated PO files to provide
507 even more context (line numbers, and the names and types
512 The %I<n>$s (or %I<n>$p, etc.) notation can be used
513 for change the ordering of the variables,
514 if such a reordering is required for correct translation.
518 If a particular <TMPL_VAR> should not appear in the
519 translation, it can be suppressed with the %0.0s notation.
523 Using the PO format also means translators can add their
524 own comments in the translation files, if necessary.
528 Create, update, and install actions are all based on the
529 same scanner module. This ensures that update and install
530 have the same idea of what is a translatable string;
531 attribute names in tags, for example, will not be
532 accidentally translated.
538 Anchors are represented by an <AI<n>> notation.
539 The meaning of this non-standard notation might not be obvious.
541 The create action calls xgettext.pl to do the actual work;
542 the update action calls xgettext.pl, msgmerge(1) and msgattrib(1)
543 to do the actual work.
547 xgettext.pl must be present in the current directory; both
548 msgmerge(1) and msgattrib(1) must also be present in the search path.
549 The script currently does not check carefully whether these
550 dependent commands are present.
552 Locale::PO(3) has a lot of bugs. It can neither parse nor
553 generate GNU PO files properly; a couple of workarounds have
554 been written in TmplTokenizer and more is likely to be needed
555 (e.g., to get rid of the "Strange line" warning for #~).
557 This script may not work in Windows.
559 There are probably some other bugs too, since this has not been
570 http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms