2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
9 tmpl_process3.pl - Experimental version of tmpl_process.pl
10 using gettext-compatible translation files
17 use File::Temp qw( :POSIX );
19 use VerboseWarnings qw( :warn :die );
21 ###############################################################################
23 use vars qw( @in_files $in_dir $str_file $out_dir );
24 use vars qw( @excludes $exclude_regex );
25 use vars qw( $recursive_p );
26 use vars qw( $pedantic_p );
28 use vars qw( $type ); # file extension (DOS form without the dot) to match
29 use vars qw( $charset_in $charset_out );
31 ###############################################################################
33 sub find_translation ($) {
37 $key = TmplTokenizer::string_canon($key);
38 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
39 $key = TmplTokenizer::quote_po($key);
41 return defined $href->{$key}
42 && !$href->{$key}->fuzzy
43 && length Locale::PO->dequote($href->{$key}->msgstr)?
44 Locale::PO->dequote($href->{$key}->msgstr): $s;
47 sub text_replace_tag ($$) {
50 # value [tag=input], meta
51 my $tag = lc($1) if $t =~ /^<(\S+)/s;
53 for my $a ('alt', 'content', 'title', 'value') {
55 next if $a eq 'content' && $tag ne 'meta';
56 next if $a eq 'value' && ($tag ne 'input'
57 || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME
58 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
60 my $s = find_translation($val);
61 if ($attr->{$a}->[1] ne $s) { #FIXME
62 $attr->{$a}->[1] = $s; # FIXME
63 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
72 sprintf(' %s=%s', $_, $attr->{$_}->[2]) #FIXME
74 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
83 sub text_replace (**) {
86 my $s = TmplTokenizer::next_token $h;
87 last unless defined $s;
88 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
89 if ($kind eq TmplTokenType::TEXT) {
90 print $output find_translation($t);
91 } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) {
92 my $fmt = find_translation($s->form);
93 print $output TmplTokenizer::parametrize($fmt, [ map {
94 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
95 $kind == TmplTokenType::TAG && %$attr?
96 text_replace_tag($t, $attr): $t } $s->parameters ], [ $s->anchors ]);
97 } elsif ($kind eq TmplTokenType::TAG && %$attr) {
98 print $output text_replace_tag($t, $attr);
99 } elsif (defined $t) {
106 my($dir, $type) = @_;
108 if (opendir(DIR, $dir)) {
109 my @dirent = readdir DIR; # because DIR is shared when recursing
111 for my $dirent (@dirent) {
112 my $path = "$dir/$dirent";
113 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
114 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
117 push @it, $path if !defined $type || $dirent =~ /\.(?:$type)$/;
118 } elsif (-d $path && $recursive_p) {
119 push @it, listfiles($path, $type);
123 warn_normal "$dir: $!", undef;
128 ###############################################################################
132 my $h = $exitcode? *STDERR: *STDOUT;
134 Usage: $0 create [OPTION]
135 or: $0 update [OPTION]
136 or: $0 install [OPTION]
138 Create or update PO files from templates, or install translated templates.
140 -i, --input=SOURCE Get or update strings from SOURCE file.
141 SOURCE is a directory if -r is also specified.
142 -o, --outputdir=DIRECTORY Install translation(s) to specified DIRECTORY
143 --pedantic-warnings Issue warnings even for detected problems
144 which are likely to be harmless
145 -r, --recursive SOURCE in the -i option is a directory
146 -s, --str-file=FILE Specify FILE as the translation (po) file
147 for input (install) or output (create, update)
148 -x, --exclude=REGEXP Exclude files matching the given REGEXP
149 --help Display this help and exit
151 The -o option is ignored for the "create" and "update" actions.
152 Try `perldoc $0' for perhaps more information.
157 ###############################################################################
159 sub usage_error (;$) {
160 for my $msg (split(/\n/, $_[0])) {
161 print STDERR "$msg\n";
163 print STDERR "Try `$0 --help' for more information.\n";
167 ###############################################################################
170 'input|i=s' => \@in_files,
171 'outputdir|o=s' => \$out_dir,
172 'recursive|r' => \$recursive_p,
173 'str-file|s=s' => \$str_file,
174 'exclude|x=s' => \@excludes,
175 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
179 VerboseWarnings::set_application_name $0;
180 VerboseWarnings::set_pedantic_mode $pedantic_p;
182 # keep the buggy Locale::PO quiet if it says stupid things
183 $SIG{__WARN__} = sub {
185 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
188 my $action = shift or usage_error('You must specify an ACTION.');
189 usage_error('You must at least specify input and string list filenames.')
190 if !@in_files || !defined $str_file;
192 # Type match defaults to *.tmpl plus *.inc if not specified
193 $type = "tmpl|inc" if !defined($type);
195 # Check the inputs for being files or directories
196 for my $input (@in_files) {
197 usage_error("$input: Input must be a file or directory.\n"
198 . "(Symbolic links are not supported at the moment)")
199 unless -d $input || -f $input;;
202 # Generates the global exclude regular expression
203 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
205 # Generate the list of input files if a directory is specified
206 if (-d $in_files[0]) {
207 die "If you specify a directory as input, you must specify only it.\n"
210 # input is a directory, generates list of files to process
211 $in_dir = $in_files[0];
212 $in_dir =~ s/\/$//; # strips the trailing / if any
213 @in_files = listfiles($in_dir, $type);
215 for my $input (@in_files) {
216 die "You cannot specify input files and directories at the same time.\n"
221 # restores the string list from file
222 $href = Locale::PO->load_file_ashash($str_file);
224 # guess the charsets. HTML::Templates defaults to iso-8859-1
226 die "$str_file: PO file is corrupted, or not a PO file\n"
227 unless defined $href->{'""'};
228 $charset_out = TmplTokenizer::charset_canon $2
229 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
230 for my $msgid (keys %$href) {
231 if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
232 my $candidate = TmplTokenizer::charset_canon $2;
233 die "Conflicting charsets in msgid: $charset_in vs $candidate\n"
234 if defined $charset_in && $charset_in ne $candidate;
235 $charset_in = $candidate;
239 if (!defined $charset_in) {
240 $charset_in = TmplTokenizer::charset_canon 'iso8859-1';
241 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
244 my $xgettext = './xgettext.pl'; # actual text extractor script
247 if ($action eq 'create') {
248 # updates the list. As the list is empty, every entry will be added
250 warn "Removing empty file $str_file\n";
251 unlink $str_file || die "$str_file: $!\n";
253 die "$str_file: Output file already exists\n" if -f $str_file;
254 my($tmph, $tmpfile) = tmpnam();
255 # Generate the temporary file that acts as <MODULE>/POTFILES.in
256 for my $input (@in_files) {
257 print $tmph "$input\n";
260 # Generate the specified po file ($str_file)
261 $st = system ($xgettext, '-s', '-f', $tmpfile, '-o', $str_file);
262 warn_normal "Text extraction failed: $xgettext: $!\n", undef if $st != 0;
263 # unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef;
265 } elsif ($action eq 'update') {
266 my($tmph1, $tmpfile1) = tmpnam();
267 my($tmph2, $tmpfile2) = tmpnam();
268 close $tmph2; # We just want a name
269 # Generate the temporary file that acts as <MODULE>/POTFILES.in
270 for my $input (@in_files) {
271 print $tmph1 "$input\n";
274 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
275 $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
277 (defined $charset_in? ('-I', $charset_in): ()),
278 (defined $charset_out? ('-O', $charset_out): ()));
280 # Merge the temporary "pot file" with the specified po file ($str_file)
281 # FIXME: msgmerge(1) is a Unix dependency
282 # FIXME: need to check the return value
283 $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
285 error_normal "Text extraction failed: $xgettext: $!\n", undef;
286 error_additional "Will not run msgmerge\n", undef;
288 # unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
289 # unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
291 } elsif ($action eq 'install') {
292 if(!defined($out_dir)) {
293 usage_error("You must specify an output directory when using the install method.");
296 if ($in_dir eq $out_dir) {
297 warn "You must specify a different input and output directory.\n";
301 # Make sure the output directory exists
302 # (It will auto-create it, but for compatibility we should not)
303 -d $out_dir || die "$out_dir: The directory does not exist\n";
305 # Try to open the file, because Locale::PO doesn't check :-/
306 open(INPUT, "<$str_file") || die "$str_file: $!\n";
309 # creates the new tmpl file using the new translation
310 for my $input (@in_files) {
311 die "Assertion failed"
312 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
314 my $h = TmplTokenizer->new( $input );
315 $h->set_allow_cformat( 1 );
316 VerboseWarnings::set_input_file_name $input;
318 my $target = $out_dir . substr($input, length($in_dir));
319 my $targetdir = $` if $target =~ /[^\/]+$/s;
320 if (!-d $targetdir) {
321 print STDERR "Making directory $targetdir...";
322 # creates with rwxrwxr-x permissions
323 mkdir($targetdir, 0775) || warn_normal "$targetdir: $!", undef;
325 print STDERR "Creating $target...\n";
326 open( OUTPUT, ">$target" ) || die "$target: $!\n";
327 text_replace( $h, *OUTPUT );
332 usage_error('Unknown action specified.');
336 printf "The %s seems to be successful.\n", $action;
338 printf "%s FAILED.\n", "\u$action";
342 ###############################################################################
346 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
350 This is an experimental version of the tmpl_process.pl script,
351 using standard gettext-style PO files. Note that the behaviour
352 of this script should still be considered unstable.
354 Currently, the create, update, and install actions have all been
355 reimplemented and seem to work.
357 The create action calls xgettext.pl to do the actual work;
358 the update action calls xgettext.pl and msgmerge(1) to do the
361 The script can detect <TMPL_VAR> directives embedded inside what
362 appears to be a full sentence (this actual work being done by
363 TmplTokenizer(3)); these larger patterns appear in the translation
364 file as c-format strings with %s.
366 Whitespace in extracted strings are folded to single blanks, in
367 order to prevent new strings from appearing when minor changes in
368 the original templates occur, and to prevent overly difficult to
369 read strings in the PO file.
373 xgettext.pl must be present in the current directory; the
374 msgmerge(1) command must also be present in the search path.
375 The script currently does not check carefully whether these
376 dependent commands are present.
378 Locale::PO(3) has a lot of bugs. It can neither parse nor
379 generate GNU PO files properly; a couple of workarounds have
380 been written in TmplTokenizer and more is likely to be needed
381 (e.g., to get rid of the "Strange line" warning for #~).
383 There are probably some other bugs too, since this has not been