2 # This file is part of Koha
3 # Parts copyright 2003-2004 Paul Poulain
4 # Parts copyright 2003-2004 Jerome Vizcaino
5 # Parts copyright 2004 Ambrose Li
9 tmpl_process3.pl - Experimental version of tmpl_process.pl
10 using gettext-compatible translation files
17 use File::Temp qw( :POSIX );
19 use VerboseWarnings qw( error_normal warn_normal );
21 ###############################################################################
23 use vars qw( @in_files $in_dir $str_file $out_dir );
24 use vars qw( @excludes $exclude_regex );
25 use vars qw( $recursive_p );
26 use vars qw( $pedantic_p );
28 use vars qw( $type ); # file extension (DOS form without the dot) to match
29 use vars qw( $charset_in $charset_out );
31 ###############################################################################
33 sub find_translation ($) {
37 print STDERR "DEBUG: before: ($key)\n";
38 $key = TmplTokenizer::string_canon($key);
39 $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
40 $key = TmplTokenizer::quote_po($key);
41 print STDERR "DEBUG: after: ($key)\n";
43 return defined $href->{$key}
44 && !$href->{$key}->fuzzy
45 && length Locale::PO->dequote($href->{$key}->msgstr)?
46 Locale::PO->dequote($href->{$key}->msgstr): $s;
49 sub text_replace_tag ($$) {
52 # value [tag=input], meta
53 my $tag = lc($1) if $t =~ /^<(\S+)/s;
55 for my $a ('alt', 'content', 'title', 'value') {
57 next if $a eq 'content' && $tag ne 'meta';
58 next if $a eq 'value' && ($tag ne 'input'
59 || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME
60 my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
61 my($pre, $trimmed, $post) = TmplTokenizer::trim $val;
63 my $s = $pre . find_translation($trimmed) . $post;
64 if ($attr->{$a}->[1] ne $s) { #FIXME
65 $attr->{$a}->[1] = $s; # FIXME
66 $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
75 sprintf(' %s=%s', $_, $attr->{$_}->[2]) #FIXME
77 $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME
86 sub text_replace (**) {
89 my $s = TmplTokenizer::next_token $h;
90 last unless defined $s;
91 my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
92 if ($kind eq TmplTokenType::TEXT) {
93 my($pre, $trimmed, $post) = TmplTokenizer::trim $t;
94 print $output $pre, find_translation($trimmed), $post;
95 } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) {
96 my $fmt = find_translation($s->form);
97 print $output TmplTokenizer::parametrize($fmt, [ map {
98 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
99 $kind == TmplTokenType::TAG && %$attr?
100 text_replace_tag($t, $attr): $t } $s->parameters ], [ $s->anchors ]);
101 } elsif ($kind eq TmplTokenType::TAG && %$attr) {
102 print $output text_replace_tag($t, $attr);
103 } elsif (defined $t) {
110 my($dir, $type) = @_;
112 if (opendir(DIR, $dir)) {
113 my @dirent = readdir DIR; # because DIR is shared when recursing
115 for my $dirent (@dirent) {
116 my $path = "$dir/$dirent";
117 if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS'
118 || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) {
121 push @it, $path if !defined $type || $dirent =~ /\.(?:$type)$/;
122 } elsif (-d $path && $recursive_p) {
123 push @it, listfiles($path, $type);
127 warn_normal "$dir: $!", undef;
132 ###############################################################################
134 sub usage_error (;$) {
135 for my $msg (split(/\n/, $_[0])) {
136 print STDERR "$msg\n";
138 print STDERR "Try `$0 --help' for more information.\n";
142 ###############################################################################
145 'input|i=s' => \@in_files,
146 'outputdir|o=s' => \$out_dir,
147 'recursive|r' => \$recursive_p,
148 'str-file|s=s' => \$str_file,
149 'exclude|x=s' => \@excludes,
150 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 },
153 VerboseWarnings::set_application_name $0;
154 VerboseWarnings::set_pedantic_mode $pedantic_p;
156 # try to make sure .po files are backed up (see BUGS)
157 $ENV{VERSION_CONTROL} = 't';
159 # keep the buggy Locale::PO quiet if it says stupid things
160 $SIG{__WARN__} = sub {
162 print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
165 my $action = shift or usage_error('You must specify an ACTION.');
166 usage_error('You must at least specify input and string list filenames.')
167 if !@in_files || !defined $str_file;
169 # Type match defaults to *.tmpl plus *.inc if not specified
170 $type = "tmpl|inc" if !defined($type);
172 # Check the inputs for being files or directories
173 for my $input (@in_files) {
174 usage_error("$input: Input must be a file or directory.\n"
175 . "(Symbolic links are not supported at the moment)")
176 unless -d $input || -f $input;;
179 # Generates the global exclude regular expression
180 $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes;
182 # Generate the list of input files if a directory is specified
183 if (-d $in_files[0]) {
184 die "If you specify a directory as input, you must specify only it.\n"
187 # input is a directory, generates list of files to process
188 $in_dir = $in_files[0];
189 $in_dir =~ s/\/$//; # strips the trailing / if any
190 @in_files = listfiles($in_dir, $type);
192 for my $input (@in_files) {
193 die "You cannot specify input files and directories at the same time.\n"
198 # restores the string list from file
199 $href = Locale::PO->load_file_ashash($str_file);
201 # guess the charsets. HTML::Templates defaults to iso-8859-1
203 $charset_out = TmplTokenizer::charset_canon $2
204 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
205 for my $msgid (keys %$href) {
206 if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
207 my $candidate = TmplTokenizer::charset_canon $2;
208 die "Conflicting charsets in msgid: $charset_in vs $candidate\n"
209 if defined $charset_in && $charset_in ne $candidate;
214 if (!defined $charset_in) {
215 $charset_in = TmplTokenizer::charset_canon 'iso8859-1';
216 warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
219 if ($action eq 'create') {
220 # updates the list. As the list is empty, every entry will be added
221 die "$str_file: Output file already exists" if -f $str_file;
222 my($tmph, $tmpfile) = tmpnam();
223 # Generate the temporary file that acts as <MODULE>/POTFILES.in
224 for my $input (@in_files) {
225 print $tmph "$input\n";
228 # Generate the specified po file ($str_file)
229 system ('xgettext.pl', '-s', '-f', $tmpfile, '-o', $str_file);
230 unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef;
232 } elsif ($action eq 'update') {
233 my($tmph1, $tmpfile1) = tmpnam();
234 my($tmph2, $tmpfile2) = tmpnam();
235 close $tmph2; # We just want a name
236 # Generate the temporary file that acts as <MODULE>/POTFILES.in
237 for my $input (@in_files) {
238 print $tmph1 "$input\n";
241 # Generate the temporary file that acts as <MODULE>/<LANG>.pot
242 system('./xgettext.pl', '-s', '-f', $tmpfile1, '-o', $tmpfile2,
243 (defined $charset_in? ('-I', $charset_in): ()),
244 (defined $charset_out? ('-O', $charset_out): ()));
245 # Merge the temporary "pot file" with the specified po file ($str_file)
246 # FIXME: msgmerge(1) is a Unix dependency
247 # FIXME: need to check the return value
248 system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
249 unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
250 unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
252 } elsif ($action eq 'install') {
253 if(!defined($out_dir)) {
254 usage_error("You must specify an output directory when using the install method.");
257 if ($in_dir eq $out_dir) {
258 warn "You must specify a different input and output directory.\n";
262 # Make sure the output directory exists
263 # (It will auto-create it, but for compatibility we should not)
264 -d $out_dir || die "$out_dir: The directory does not exist\n";
266 # Try to open the file, because Locale::PO doesn't check :-/
267 open(INPUT, "<$str_file") || die "$str_file: $!\n";
270 # creates the new tmpl file using the new translation
271 for my $input (@in_files) {
272 die "Assertion failed"
273 unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/";
275 my $h = TmplTokenizer->new( $input );
276 $h->set_allow_cformat( 1 );
277 VerboseWarnings::set_input_file_name $input;
279 my $target = $out_dir . substr($input, length($in_dir));
280 my $targetdir = $` if $target =~ /[^\/]+$/s;
281 if (!-d $targetdir) {
282 print STDERR "Making directory $targetdir...";
283 # creates with rwxrwxr-x permissions
284 mkdir($targetdir, 0775) || warn_normal "$targetdir: $!", undef;
286 print STDERR "Creating $target...\n";
287 open( OUTPUT, ">$target" ) || die "$target: $!\n";
288 text_replace( $h, *OUTPUT );
293 usage_error('Unknown action specified.');
297 ###############################################################################
301 ./tmpl_process3.pl [ I<tmpl_process.pl options> ]
305 This is an experimental version of the tmpl_process.pl script,
306 using standard gettext-style PO files. Note that the behaviour
307 of this script should still be considered unstable.
309 Currently, the create, update, and install actions have all been
310 reimplemented and seem to work.
312 The create action calls xgettext.pl to do the actual work;
313 the update action calls xgettext.pl and msgmerge(1) to do the
316 The script can detect <TMPL_VAR> directives embedded inside what
317 appears to be a full sentence (this actual work being done by
318 TmplTokenizer(3)); these larger patterns appear in the translation
319 file as c-format strings with %s.
321 Whitespace in extracted strings are folded to single blanks, in
322 order to prevent new strings from appearing when minor changes in
323 the original templates occur, and to prevent overly difficult to
324 read strings in the PO file.
328 The --help option has not been implemented yet.
330 xgettext.pl must be present in the current directory; the
331 msgmerge(1) command must also be present in the search path.
332 The script currently does not check carefully whether these
333 dependent commands are present.
335 If xgettext.pl is interrupted by the user, a corrupted po file
336 will result. This is very seriously wrong.
338 Locale::PO(3) has a lot of bugs. It can neither parse nor
339 generate GNU PO files properly; a couple of workarounds have
340 been written in TmplTokenizer and more is likely to be needed
341 (e.g., to get rid of the "Strange line" warning for #~).
343 There are probably some other bugs too, since this has not been