2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
12 tmpl_process3.pl - Alternative version of tmpl_process.pl
13 using gettext-compatible translation files
18 #use warnings; FIXME - Bug 2505
22 use File::Temp qw( :POSIX );
24 use VerboseWarnings qw( :warn :die );
26 ###############################################################################
28 use vars qw( @in_dirs @filenames @match @nomatch $str_file $out_dir $quiet );
29 use vars qw( @excludes $exclude_regex );
30 use vars qw( $recursive_p );
31 use vars qw( $pedantic_p );
33 use vars qw( $type ); # file extension (DOS form without the dot) to match
34 use vars qw( $charset_in $charset_out );
36 ###############################################################################
38 sub find_translation ($) {
42 $key = TmplTokenizer::string_canon($key);
43 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
44 $key = TmplTokenizer::quote_po($key);
46 if (defined $href->{$key} && !$href->{$key}->fuzzy && length Locale::PO->dequote($href->{$key}->msgstr)){
48 return $1 . Locale::PO->dequote($href->{$key}->msgstr);
51 return Locale::PO->dequote($href->{$key}->msgstr);
59 sub text_replace_tag ($$) {
63 # value [tag=input], meta
64 my $tag = lc($1) if $t =~ /^<(\S+)/s;
66 for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder') {
68 next if $a eq 'label' && $tag ne 'optgroup';
69 next if $a eq 'content' && $tag ne 'meta';
70 next if $a eq 'value' && ($tag ne 'input' || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:checkbox|hidden|radio)$/)); # FIXME
72 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
74 my $s = find_translation($val);
75 if ($attr->{$a}->[1] ne $s) { #FIXME
76 $attr->{$a}->[1] = $s; # FIXME
77 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
85 . join('', map { if ($_ ne '/'){
86 sprintf(' %s="%s"', $_, $attr->{$_}->[1]);
93 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
94 || $a cmp $b # Sort attributes BZ 22236
104 sub text_replace (**) {
105 my($h, $output) = @_;
107 my $s = TmplTokenizer::next_token $h;
108 last unless defined $s;
109 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
110 if ($kind eq C4::TmplTokenType::TEXT) {
111 print $output find_translation($t);
112 } elsif ($kind eq C4::TmplTokenType::TEXT_PARAMETRIZED) {
113 my $fmt = find_translation($s->form);
114 print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
116 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
117 $kind == C4::TmplTokenType::TAG && %$attr?
118 text_replace_tag($t, $attr): $t });
119 } elsif ($kind eq C4::TmplTokenType::TAG && %$attr) {
120 print $output text_replace_tag($t, $attr);
121 } elsif ($s->has_js_data) {
122 for my $t (@{$s->js_data}) {
123 # FIXME for this whole block
125 printf $output "%s%s%s", $t->[2], find_translation $t->[3],
128 print $output $t->[1];
131 } elsif (defined $t) {
132 # Quick fix to bug 4472
133 $t = "<!DOCTYPE stylesheet [" if $t =~ /DOCTYPE stylesheet/ ;
140 my($dir, $type, $action) = @_;
141 my $filenames = join ('|', @filenames); # used to update strings from this file
142 my $match = join ('|', @match); # use only this files
143 my $nomatch = join ('|', @nomatch); # do no use this files
145 if (opendir(DIR, $dir)) {
146 my @dirent = readdir DIR; # because DIR is shared when recursing
148 for my $dirent (@dirent) {
149 my $path = "$dir/$dirent";
150 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
151 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
154 my $basename = fileparse( $path );
156 if ( not @filenames or $basename =~ /($filenames)/i )
157 and ( not @match or $basename =~ /($match)/i ) # files to include
158 and ( not @nomatch or $basename !~ /($nomatch)/i ) # files not to include
159 and (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install';
160 } elsif (-d $path && $recursive_p) {
161 push @it, listfiles($path, $type, $action);
165 warn_normal "$dir: $!", undef;
170 ###############################################################################
172 sub mkdir_recursive ($) {
174 local($`, $&, $', $1);
175 $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/;
176 my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir);
177 mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix;
179 print STDERR "Making directory $dir...\n" unless $quiet;
180 # creates with rwxrwxr-x permissions
181 mkdir($dir, 0775) || warn_normal "$dir: $!", undef;
185 ###############################################################################
189 my $h = $exitcode? *STDERR: *STDOUT;
191 Usage: $0 create [OPTION]
192 or: $0 update [OPTION]
193 or: $0 install [OPTION]
195 Create or update PO files from templates, or install translated templates.
197 -i, --input=SOURCE Get or update strings from SOURCE directory(s).
198 On create or update can have multiple values.
199 On install only one value.
200 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
201 --pedantic-warnings Issue warnings even for detected problems
202 which are likely to be harmless
203 -r, --recursive SOURCE in the -i option is a directory
204 -f, --filename=FILE FILE is a specific filename or part of it.
205 If given, only these files will be processed.
206 On update only relevant strings will be updated.
207 -m, --match=FILE FILE is a specific filename or part of it.
208 If given, only these files will be processed.
209 -n, --nomatch=FILE FILE is a specific filename or part of it.
210 If given, these files will not be processed.
211 -s, --str-file=FILE Specify FILE as the translation (po) file
212 for input (install) or output (create, update)
213 -x, --exclude=REGEXP Exclude dirs matching the given REGEXP
214 --help Display this help and exit
215 -q, --quiet no output to screen (except for errors)
217 The -o option is ignored for the "create" and "update" actions.
218 Try `perldoc $0` for perhaps more information.
223 ###############################################################################
225 sub usage_error (;$) {
226 for my $msg (split(/\n/, $_[0])) {
227 print STDERR "$msg\n";
229 print STDERR "Try `$0 --help for more information.\n";
233 ###############################################################################
236 'input|i=s' => \@in_dirs,
237 'filename|f=s' => \@filenames,
238 'match|m=s' => \@match,
239 'nomatch|n=s' => \@nomatch,
240 'outputdir|o=s' => \$out_dir,
241 'recursive|r' => \$recursive_p,
242 'str-file|s=s' => \$str_file,
243 'exclude|x=s' => \@excludes,
244 'quiet|q' => \$quiet,
245 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
249 VerboseWarnings::set_application_name $0;
250 VerboseWarnings::set_pedantic_mode $pedantic_p;
252 # keep the buggy Locale::PO quiet if it says stupid things
253 $SIG{__WARN__} = sub {
255 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
258 my $action = shift or usage_error('You must specify an ACTION.');
259 usage_error('You must at least specify input and string list filenames.')
260 if !@in_dirs || !defined $str_file;
262 # Type match defaults to *.tt plus *.inc if not specified
263 $type = "tt|inc|xsl|xml|def" if !defined($type);
265 # Check the inputs for being directories
266 for my $in_dir ( @in_dirs ) {
267 usage_error("$in_dir: Input must be a directory.\n"
268 . "(Symbolic links are not supported at the moment)")
272 # Generates the global exclude regular expression
273 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
276 # Generate the list of input files if a directory is specified
277 # input is a directory, generates list of files to process
279 for my $fn ( @filenames ) {
280 die "You cannot specify input files and directories at the same time.\n"
283 for my $in_dir ( @in_dirs ) {
284 $in_dir =~ s/\/$//; # strips the trailing / if any
285 @in_files = ( @in_files, listfiles($in_dir, $type, $action));
288 # restores the string list from file
289 $href = Locale::PO->load_file_ashash($str_file);
291 # guess the charsets. HTML::Templates defaults to iso-8859-1
293 die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'};
294 $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
295 $charset_in = $charset_out;
296 # for my $msgid (keys %$href) {
297 # if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
298 # my $candidate = TmplTokenizer::charset_canon $2;
299 # die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n"
300 # if defined $charset_in && $charset_in ne $candidate;
301 # $charset_in = $candidate;
305 # BUG6464: check consistency of PO messages
306 # - count number of '%s' in msgid and msgstr
307 for my $msg ( values %$href ) {
308 my $id_count = split(/%s/, $msg->{msgid}) - 1;
309 my $str_count = split(/%s/, $msg->{msgstr}) - 1;
310 next if $id_count == $str_count ||
311 $msg->{msgstr} eq '""' ||
312 grep { /fuzzy/ } @{$msg->{_flags}};
314 "unconsistent %s count: ($id_count/$str_count):\n" .
315 " line: " . $msg->{loaded_line_number} . "\n" .
316 " msgid: " . $msg->{msgid} . "\n" .
317 " msgstr: " . $msg->{msgstr} . "\n", undef;
321 # set our charset in to UTF-8
322 if (!defined $charset_in) {
323 $charset_in = TmplTokenizer::charset_canon 'UTF-8';
324 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
326 # set our charset out to UTF-8
327 if (!defined $charset_out) {
328 $charset_out = TmplTokenizer::charset_canon 'UTF-8';
329 warn "Warning: Charset Out defaulting to $charset_out\n";
331 my $xgettext = './xgettext.pl'; # actual text extractor script
334 if ($action eq 'create') {
335 # updates the list. As the list is empty, every entry will be added
337 warn "Removing empty file $str_file\n";
338 unlink $str_file || die "$str_file: $!\n";
340 die "$str_file: Output file already exists\n" if -f $str_file;
341 my($tmph1, $tmpfile1) = tmpnam();
342 my($tmph2, $tmpfile2) = tmpnam();
343 close $tmph2; # We just want a name
344 # Generate the temporary file that acts as <MODULE>/POTFILES.in
345 for my $input (@in_files) {
346 print $tmph1 "$input\n";
349 warn "I $charset_in O $charset_out";
350 # Generate the specified po file ($str_file)
351 $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
352 (defined $charset_in? ('-I', $charset_in): ()),
353 (defined $charset_out? ('-O', $charset_out): ())
355 # Run msgmerge so that the pot file looks like a real pot file
356 # We need to help msgmerge a bit by pre-creating a dummy po file that has
357 # the headers and the "" msgid & msgstr. It will fill in the rest.
359 # Merge the temporary "pot file" with the specified po file ($str_file)
360 # FIXME: msgmerge(1) is a Unix dependency
361 # FIXME: need to check the return value
362 unless (-f $str_file) {
363 local(*INPUT, *OUTPUT);
364 open(INPUT, "<$tmpfile2");
365 open(OUTPUT, ">$str_file");
373 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
375 error_normal "Text extraction failed: $xgettext: $!\n", undef;
376 error_additional "Will not run msgmerge\n", undef;
378 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
379 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
381 } elsif ($action eq 'update') {
382 my($tmph1, $tmpfile1) = tmpnam();
383 my($tmph2, $tmpfile2) = tmpnam();
384 close $tmph2; # We just want a name
385 # Generate the temporary file that acts as <MODULE>/POTFILES.in
386 for my $input (@in_files) {
387 print $tmph1 "$input\n";
390 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
391 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
393 (defined $charset_in? ('-I', $charset_in): ()),
394 (defined $charset_out? ('-O', $charset_out): ()));
396 # Merge the temporary "pot file" with the specified po file ($str_file)
397 # FIXME: msgmerge(1) is a Unix dependency
398 # FIXME: need to check the return value
400 my ($tmph3, $tmpfile3) = tmpnam();
401 $st = system("msgcat $str_file $tmpfile2 > $tmpfile3");
402 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile3 -o - | msgattrib --no-obsolete -o $str_file")
405 $st = system("msgmerge ".($quiet?'-q':'')." -s $str_file $tmpfile2 -o - | msgattrib --no-obsolete -o $str_file");
408 error_normal "Text extraction failed: $xgettext: $!\n", undef;
409 error_additional "Will not run msgmerge\n", undef;
411 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
412 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
414 } elsif ($action eq 'install') {
415 if(!defined($out_dir)) {
416 usage_error("You must specify an output directory when using the install method.");
419 if ( scalar @in_dirs > 1 ) {
420 usage_error("You must specify only one input directory when using the install method.");
423 my $in_dir = shift @in_dirs;
425 if ($in_dir eq $out_dir) {
426 warn "You must specify a different input and output directory.\n";
430 # Make sure the output directory exists
431 # (It will auto-create it, but for compatibility we should not)
432 -d $out_dir || die "$out_dir: The directory does not exist\n";
434 # Try to open the file, because Locale::PO doesn't check :-/
435 open(INPUT, "<$str_file") || die "$str_file: $!\n";
438 # creates the new tmpl file using the new translation
439 for my $input (@in_files) {
440 die "Assertion failed"
441 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
443 my $target = $out_dir . substr($input, length($in_dir));
444 my $targetdir = $` if $target =~ /[^\/]+$/s;
446 if (!defined $type || $input =~ /\.(?:$type)$/) {
447 my $h = TmplTokenizer->new( $input );
448 $h->set_allow_cformat( 1 );
449 VerboseWarnings::set_input_file_name $input;
450 mkdir_recursive($targetdir) unless -d $targetdir;
451 print STDERR "Creating $target...\n" unless $quiet;
452 open( OUTPUT, ">$target" ) || die "$target: $!\n";
453 text_replace( $h, *OUTPUT );
456 # just copying the file
457 mkdir_recursive($targetdir) unless -d $targetdir;
458 system("cp -f $input $target");
459 print STDERR "Copying $input...\n" unless $quiet;
464 usage_error('Unknown action specified.');
468 printf "The %s seems to be successful.\n", $action unless $quiet;
470 printf "%s FAILED.\n", "\u$action" unless $quiet;
474 ###############################################################################
478 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
482 This is an alternative version of the tmpl_process.pl script,
483 using standard gettext-style PO files. While there still might
484 be changes made to the way it extracts strings, at this moment
485 it should be stable enough for general use; it is already being
486 used for the Chinese and Polish translations.
488 Currently, the create, update, and install actions have all been
489 reimplemented and seem to work.
497 Translation files in standard Uniforum PO format.
498 All standard tools including all gettext tools,
499 plus PO file editors like kbabel(1) etc.
504 Minor changes in whitespace in source templates
505 do not generally require strings to be re-translated.
509 Able to handle <TMPL_VAR> variables in the templates;
510 <TMPL_VAR> variables are usually extracted in proper context,
511 represented by a short %s placeholder.
515 Able to handle text input and radio button INPUT elements
516 in the templates; these INPUT elements are also usually
517 extracted in proper context,
518 represented by a short %S or %p placeholder.
522 Automatic comments in the generated PO files to provide
523 even more context (line numbers, and the names and types
528 The %I<n>$s (or %I<n>$p, etc.) notation can be used
529 for change the ordering of the variables,
530 if such a reordering is required for correct translation.
534 If a particular <TMPL_VAR> should not appear in the
535 translation, it can be suppressed with the %0.0s notation.
539 Using the PO format also means translators can add their
540 own comments in the translation files, if necessary.
544 Create, update, and install actions are all based on the
545 same scanner module. This ensures that update and install
546 have the same idea of what is a translatable string;
547 attribute names in tags, for example, will not be
548 accidentally translated.
554 Anchors are represented by an <AI<n>> notation.
555 The meaning of this non-standard notation might not be obvious.
557 The create action calls xgettext.pl to do the actual work;
558 the update action calls xgettext.pl, msgmerge(1) and msgattrib(1)
559 to do the actual work.
563 xgettext.pl must be present in the current directory; both
564 msgmerge(1) and msgattrib(1) must also be present in the search path.
565 The script currently does not check carefully whether these
566 dependent commands are present.
568 Locale::PO(3) has a lot of bugs. It can neither parse nor
569 generate GNU PO files properly; a couple of workarounds have
570 been written in TmplTokenizer and more is likely to be needed
571 (e.g., to get rid of the "Strange line" warning for #~).
573 This script may not work in Windows.
575 There are probably some other bugs too, since this has not been
586 http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms