Bug 21481: Remove obsolete files from translation toolkit

Signed-off-by: Owen Leonard <oleonard@myacpl.org> Signed-off-by: Julian Maurice <julian.maurice@biblibre.com> Conflicts: misc/translator/text-extract.pl misc/translator/text-extract2.pl Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
6 years ago · 6e499dd567
4 changed files with 1 additions and 218 deletions
--- a/misc/translator/Makefile
+++ b/misc/translator/Makefile
@ -1,7 +0,0 @@
 all: translator_doc.txt
 %.txt: %.html
 	set -x; sed -e 's/<blockquote>/&<table border=0 width="100%"><tr><td nowrap><pre>/g' -e 's/<\/blockquote>/<\/pre><\/table>&/g' $<|LANG=C w3m -T text/html -cols 80 | sed -e 's/  *$$//g' | cat -s > $@
 clean:
 	rm translator_doc.txt
--- a/misc/translator/text-extract.pl
+++ b/misc/translator/text-extract.pl
@ -1,39 +0,0 @@
 #!/usr/bin/perl
 #use strict;
 #use warnings; FIXME - Bug 2505
 use FindBin;
 use lib $FindBin::Bin;
 use HTML::Tree;
 use Getopt::Std;
 getopt("f:");
 	my $tree = HTML::TreeBuilder->new; # empty tree
 	$tree->parse_file($opt_f);
 	sub give_id {
 		my $x = $_[0];
 		foreach my $c ($x->content_list) {
 			next if (ref($c) && $c->tag() eq "~comment");
 			next if (ref($c) && $c->tag() eq "script");
 			next if (ref($c) && $c->tag() eq "style");
 			if (!ref($c)) {
 				print "$c\n";
 			}
 			if (ref($c) && $c->attr('alt')) {
 				print $c->attr('alt')."\n";
 			}
 			if (ref($c) && $c->attr('title')) {
 				print $c->attr('title')."\n";
 			}
 			if (ref($c) && $c->tag() eq "input" && $c->attr('value')) {
 				print $c->attr('value')."\n";
 			}
 			if (ref($c) && $c->tag() eq 'meta') {
 				print $c->attr('content')."\n ";
 			}
 			give_id($c) if ref $c; # ignore text nodes
 		}
 	};
 	give_id($tree);
 	$tree = $tree->delete;
--- a/misc/translator/text-extract2.pl
+++ b/misc/translator/text-extract2.pl
@ -1,170 +0,0 @@
 #!/usr/bin/perl
 # Test filter partially based on Ambrose's hideous subst.pl code
 # The idea is that the .tt files are not valid HTML, and as a result
 # HTML::Parse would be completely confused by these templates.
 # This is just a simple scanner (not a parser) & should give better results.
 # This script is meant to be a drop-in replacement of text-extract.pl
 # A grander plan: Code could be written to detect template variables and
 # construct gettext-c-format-string-like meta-strings (e.g., "Results %s
 # through %s of %s records" that will be more likely to be translatable
 # to languages where word order is very unlike English word order.
 # --> This will be relatively major rework, and requires corresponding
 # rework in tmpl_process.pl
 use FindBin;
 use lib $FindBin::Bin;
 use strict;
 #use warnings; FIXME - Bug 2505
 use Getopt::Long;
 use TmplTokenizer;
 use VerboseWarnings;
 use vars qw( $input );
 use vars qw( $debug_dump_only_p );
 use vars qw( $pedantic_p );
 use vars qw( $allow_cformat_p ); # FOR TESTING PURPOSES ONLY!!
 ###############################################################################
 sub underline ($) { # for testing only
    my($s) = @_;
    join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $s));
 }
 sub debug_dump ($) { # for testing only
    my($h) = @_;
    print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
    for (;;) {
 	my $s = TmplTokenizer::next_token $h;
    last unless defined $s;
 	printf "%s\n", ('-' x 79);
 	my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
 	printf "%s [line %d]:\n", $kind->to_string, $s->line_number;
 	printf "%4dH%s\n", length($t), underline($t);
    if ($kind == TmplTokenType::TAG() && %$attr) {
 	    printf "Attributes:\n";
 	    for my $a (keys %$attr) {
 		my($key, $val, $val_orig, $order) = @{$attr->{$a}};
 		printf "%s = %dH%s -- %s\n", $a, length $val, underline $val,
 		$val_orig;
 	    }
 	}
    if ($kind == TmplTokenType::TEXT_PARAMETRIZED()) {
 	    printf "Form (c-format string):\n";
 	    printf "%dH%s\n", length $s->form, underline $s->form;
 	    printf "Parameters:\n";
 	    my $i = 1;
 	    for my $a ($s->parameters) {
 		my $t = $a->string;
 		printf "%%%d\$s = %dH%s\n", $i, length $t, underline $t;
 		$i += 1;
 	    }
 	}
 	if ($s->has_js_data) {
 	    printf "JavaScript translatable strings:\n";
 	    for my $t (@{$s->js_data}) {
 		printf "%dH%s\n", length $t->[3], underline $t->[3] if $t->[0]; # FIXME
 	    }
 	}
    }
 }
 ###############################################################################
 sub text_extract ($) {
    my($h) = @_;
    my %text = ();
    for (;;) {
 	my $s = TmplTokenizer::next_token $h;
    last unless defined $s;
 	my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
    if ($kind == TmplTokenType::TEXT()) {
 	    $t = TmplTokenizer::trim $t;
 	    $text{$t} = 1 if $t =~ /\S/s;
    } elsif ($kind == TmplTokenType::TAG() && %$attr) {
 	    # value [tag=input], meta
 	    my $tag = lc($1) if $t =~ /^<(\S+)/s;
 	    for my $a ('alt', 'content', 'title', 'value') {
 		if ($attr->{$a}) {
 		    next if $a eq 'content' && $tag ne 'meta';
 		    next if $a eq 'value' && ($tag ne 'input'
 			|| (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME
 		    my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
 		    $val = TmplTokenizer::trim $val;
 		    $text{$val} = 1 if $val =~ /\S/s;
 		}
 	    }
 	} elsif ($s->has_js_data) {
 	    for my $t (@{$s->js_data}) {
 		remember( $s, $t->[3] ) if $t->[0]; # FIXME
 	    }
 	}
    }
    # Emit all extracted strings.
    # Don't emit pure whitespace, pure numbers, or TMPL_VAR's.
    for my $t (keys %text) {
 	printf "%s\n", $t
 	    unless TmplTokenizer::blank_p($t) || $t =~ /^\d+$/;
    }
 }
 ###############################################################################
 sub usage ($) {
    my($exitcode) = @_;
    my $h = $exitcode? *STDERR: *STDOUT;
    print $h <<EOF;
 Usage: $0 [OPTIONS]
 Extract strings from HTML file.
      --debug-dump-only     Do not extract strings; but display scanned tokens
  -f, --file=FILE           Extract from the specified FILE
      --pedantic-warnings   Issue warnings even for detected problems which
 			    are likely to be harmless
      --help                Display this help and exit
 EOF
    exit($exitcode);
 }
 ###############################################################################
 sub usage_error (;$) {
    print STDERR "$_[0]\n" if @_;
    print STDERR "Try `$0 --help' for more information.\n";
    exit(-1);
 }
 ###############################################################################
 GetOptions(
    'enable-cformat'	=> \$allow_cformat_p,
    'f|file=s'		=> \$input,
    'debug-dump-only'	=> \$debug_dump_only_p,
    'pedantic-warnings'	=> sub { $pedantic_p = 1 },
    'help'		=> sub { usage(0) },
 ) || usage_error;
 VerboseWarnings::set_application_name $0;
 VerboseWarnings::set_input_file_name $input;
 VerboseWarnings::set_pedantic_mode $pedantic_p;
 usage_error('Missing mandatory option -f') unless defined $input;
 my $h = TmplTokenizer->new( $input );
 $h->set_allow_cformat( 1 ) if $allow_cformat_p;
 if ($debug_dump_only_p) {
    debug_dump( $h );
 } else {
    text_extract( $h );
 }
 warn "This input will not work with Mozilla standards-compliant mode\n", undef
 	if TmplTokenizer::syntaxerror_p;
 close INPUT;
 exit(-1) if TmplTokenizer::fatal_p;
--- a/misc/translator/xgettext.pl
+++ b/misc/translator/xgettext.pl
@ -396,8 +396,7 @@ exit(-1) if TmplTokenizer::fatal_p;
 =head1 DESCRIPTION
-This is an experimental script based on the modularized
+This script has behaviour similar to
 text-extract2.pl script.  It has behaviour similar to
 xgettext(1), and generates gettext-compatible output files.
 A gettext-like format provides the following advantages: