From cef8798f7c4ab7f6ebd029d2967b6901b8668c7a Mon Sep 17 00:00:00 2001 From: tipaul Date: Wed, 8 Dec 2004 10:37:19 +0000 Subject: [PATCH] *** empty log message *** --- misc/translator/text-extract.pl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 misc/translator/text-extract.pl diff --git a/misc/translator/text-extract.pl b/misc/translator/text-extract.pl new file mode 100755 index 0000000000..f876e78195 --- /dev/null +++ b/misc/translator/text-extract.pl @@ -0,0 +1,33 @@ +#!/usr/bin/perl +use HTML::Tree; +use Getopt::Std; +getopt("f:"); + my $tree = HTML::TreeBuilder->new; # empty tree + + $tree->parse_file($opt_f); + sub give_id { + my $x = $_[0]; + foreach my $c ($x->content_list) { + next if (ref($c) && $c->tag() eq "~comment"); + next if (ref($c) && $c->tag() eq "script"); + next if (ref($c) && $c->tag() eq "style"); + if (!ref($c)) { + print "$c\n"; + } + if (ref($c) && $c->attr('alt')) { + print $c->attr('alt')."\n"; + } + if (ref($c) && $c->attr('title')) { + print $c->attr('title')."\n"; + } + if (ref($c) && $c->tag() eq "input" && $c->attr('value')) { + print $c->attr('value')."\n"; + } + if (ref($c) && $c->tag() eq 'meta') { + print $c->attr('content')."\n "; + } + give_id($c) if ref $c; # ignore text nodes + } + }; + give_id($tree); + $tree = $tree->delete; -- 2.39.5