Browse Source

Bug 21481: Remove obsolete files from translation toolkit

Signed-off-by: Owen Leonard <oleonard@myacpl.org>
Signed-off-by: Julian Maurice <julian.maurice@biblibre.com>

Conflicts:
	misc/translator/text-extract.pl
	misc/translator/text-extract2.pl

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
18.11.x
Josef Moravec 6 years ago
committed by Nick Clemens
parent
commit
6e499dd567
  1. 7
      misc/translator/Makefile
  2. 39
      misc/translator/text-extract.pl
  3. 170
      misc/translator/text-extract2.pl
  4. 3
      misc/translator/xgettext.pl

7
misc/translator/Makefile

@ -1,7 +0,0 @@
all: translator_doc.txt
%.txt: %.html
set -x; sed -e 's/<blockquote>/&<table border=0 width="100%"><tr><td nowrap><pre>/g' -e 's/<\/blockquote>/<\/pre><\/table>&/g' $<|LANG=C w3m -T text/html -cols 80 | sed -e 's/ *$$//g' | cat -s > $@
clean:
rm translator_doc.txt

39
misc/translator/text-extract.pl

@ -1,39 +0,0 @@
#!/usr/bin/perl
#use strict;
#use warnings; FIXME - Bug 2505
use FindBin;
use lib $FindBin::Bin;
use HTML::Tree;
use Getopt::Std;
getopt("f:");
my $tree = HTML::TreeBuilder->new; # empty tree
$tree->parse_file($opt_f);
sub give_id {
my $x = $_[0];
foreach my $c ($x->content_list) {
next if (ref($c) && $c->tag() eq "~comment");
next if (ref($c) && $c->tag() eq "script");
next if (ref($c) && $c->tag() eq "style");
if (!ref($c)) {
print "$c\n";
}
if (ref($c) && $c->attr('alt')) {
print $c->attr('alt')."\n";
}
if (ref($c) && $c->attr('title')) {
print $c->attr('title')."\n";
}
if (ref($c) && $c->tag() eq "input" && $c->attr('value')) {
print $c->attr('value')."\n";
}
if (ref($c) && $c->tag() eq 'meta') {
print $c->attr('content')."\n ";
}
give_id($c) if ref $c; # ignore text nodes
}
};
give_id($tree);
$tree = $tree->delete;

170
misc/translator/text-extract2.pl

@ -1,170 +0,0 @@
#!/usr/bin/perl
# Test filter partially based on Ambrose's hideous subst.pl code
# The idea is that the .tt files are not valid HTML, and as a result
# HTML::Parse would be completely confused by these templates.
# This is just a simple scanner (not a parser) & should give better results.
# This script is meant to be a drop-in replacement of text-extract.pl
# A grander plan: Code could be written to detect template variables and
# construct gettext-c-format-string-like meta-strings (e.g., "Results %s
# through %s of %s records" that will be more likely to be translatable
# to languages where word order is very unlike English word order.
# --> This will be relatively major rework, and requires corresponding
# rework in tmpl_process.pl
use FindBin;
use lib $FindBin::Bin;
use strict;
#use warnings; FIXME - Bug 2505
use Getopt::Long;
use TmplTokenizer;
use VerboseWarnings;
use vars qw( $input );
use vars qw( $debug_dump_only_p );
use vars qw( $pedantic_p );
use vars qw( $allow_cformat_p ); # FOR TESTING PURPOSES ONLY!!
###############################################################################
sub underline ($) { # for testing only
my($s) = @_;
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $s));
}
sub debug_dump ($) { # for testing only
my($h) = @_;
print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
for (;;) {
my $s = TmplTokenizer::next_token $h;
last unless defined $s;
printf "%s\n", ('-' x 79);
my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
printf "%s [line %d]:\n", $kind->to_string, $s->line_number;
printf "%4dH%s\n", length($t), underline($t);
if ($kind == TmplTokenType::TAG() && %$attr) {
printf "Attributes:\n";
for my $a (keys %$attr) {
my($key, $val, $val_orig, $order) = @{$attr->{$a}};
printf "%s = %dH%s -- %s\n", $a, length $val, underline $val,
$val_orig;
}
}
if ($kind == TmplTokenType::TEXT_PARAMETRIZED()) {
printf "Form (c-format string):\n";
printf "%dH%s\n", length $s->form, underline $s->form;
printf "Parameters:\n";
my $i = 1;
for my $a ($s->parameters) {
my $t = $a->string;
printf "%%%d\$s = %dH%s\n", $i, length $t, underline $t;
$i += 1;
}
}
if ($s->has_js_data) {
printf "JavaScript translatable strings:\n";
for my $t (@{$s->js_data}) {
printf "%dH%s\n", length $t->[3], underline $t->[3] if $t->[0]; # FIXME
}
}
}
}
###############################################################################
sub text_extract ($) {
my($h) = @_;
my %text = ();
for (;;) {
my $s = TmplTokenizer::next_token $h;
last unless defined $s;
my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
if ($kind == TmplTokenType::TEXT()) {
$t = TmplTokenizer::trim $t;
$text{$t} = 1 if $t =~ /\S/s;
} elsif ($kind == TmplTokenType::TAG() && %$attr) {
# value [tag=input], meta
my $tag = lc($1) if $t =~ /^<(\S+)/s;
for my $a ('alt', 'content', 'title', 'value') {
if ($attr->{$a}) {
next if $a eq 'content' && $tag ne 'meta';
next if $a eq 'value' && ($tag ne 'input'
|| (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
$val = TmplTokenizer::trim $val;
$text{$val} = 1 if $val =~ /\S/s;
}
}
} elsif ($s->has_js_data) {
for my $t (@{$s->js_data}) {
remember( $s, $t->[3] ) if $t->[0]; # FIXME
}
}
}
# Emit all extracted strings.
# Don't emit pure whitespace, pure numbers, or TMPL_VAR's.
for my $t (keys %text) {
printf "%s\n", $t
unless TmplTokenizer::blank_p($t) || $t =~ /^\d+$/;
}
}
###############################################################################
sub usage ($) {
my($exitcode) = @_;
my $h = $exitcode? *STDERR: *STDOUT;
print $h <<EOF;
Usage: $0 [OPTIONS]
Extract strings from HTML file.
--debug-dump-only Do not extract strings; but display scanned tokens
-f, --file=FILE Extract from the specified FILE
--pedantic-warnings Issue warnings even for detected problems which
are likely to be harmless
--help Display this help and exit
EOF
exit($exitcode);
}
###############################################################################
sub usage_error (;$) {
print STDERR "$_[0]\n" if @_;
print STDERR "Try `$0 --help' for more information.\n";
exit(-1);
}
###############################################################################
GetOptions(
'enable-cformat' => \$allow_cformat_p,
'f|file=s' => \$input,
'debug-dump-only' => \$debug_dump_only_p,
'pedantic-warnings' => sub { $pedantic_p = 1 },
'help' => sub { usage(0) },
) || usage_error;
VerboseWarnings::set_application_name $0;
VerboseWarnings::set_input_file_name $input;
VerboseWarnings::set_pedantic_mode $pedantic_p;
usage_error('Missing mandatory option -f') unless defined $input;
my $h = TmplTokenizer->new( $input );
$h->set_allow_cformat( 1 ) if $allow_cformat_p;
if ($debug_dump_only_p) {
debug_dump( $h );
} else {
text_extract( $h );
}
warn "This input will not work with Mozilla standards-compliant mode\n", undef
if TmplTokenizer::syntaxerror_p;
close INPUT;
exit(-1) if TmplTokenizer::fatal_p;

3
misc/translator/xgettext.pl

@ -396,8 +396,7 @@ exit(-1) if TmplTokenizer::fatal_p;
=head1 DESCRIPTION
This is an experimental script based on the modularized
text-extract2.pl script. It has behaviour similar to
This script has behaviour similar to
xgettext(1), and generates gettext-compatible output files.
A gettext-like format provides the following advantages:

Loading…
Cancel
Save