text_extract is deprecated. Replaced by text_extract3
This commit is contained in:
parent
eb25e7e691
commit
ee11fc4616
1 changed files with 0 additions and 33 deletions
|
@ -1,33 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
use HTML::Tree;
|
||||
use Getopt::Std;
|
||||
getopt("f:");
|
||||
my $tree = HTML::TreeBuilder->new; # empty tree
|
||||
|
||||
$tree->parse_file($opt_f);
|
||||
sub give_id {
|
||||
my $x = $_[0];
|
||||
foreach my $c ($x->content_list) {
|
||||
next if (ref($c) && $c->tag() eq "~comment");
|
||||
next if (ref($c) && $c->tag() eq "script");
|
||||
next if (ref($c) && $c->tag() eq "style");
|
||||
if (!ref($c)) {
|
||||
print "$c\n";
|
||||
}
|
||||
if (ref($c) && $c->attr('alt')) {
|
||||
print $c->attr('alt')."\n";
|
||||
}
|
||||
if (ref($c) && $c->attr('title')) {
|
||||
print $c->attr('title')."\n";
|
||||
}
|
||||
if (ref($c) && $c->tag() eq "input" && $c->attr('value')) {
|
||||
print $c->attr('value')."\n";
|
||||
}
|
||||
if (ref($c) && $c->tag() eq 'meta') {
|
||||
print $c->attr('content')."\n ";
|
||||
}
|
||||
give_id($c) if ref $c; # ignore text nodes
|
||||
}
|
||||
};
|
||||
give_id($tree);
|
||||
$tree = $tree->delete;
|
Loading…
Reference in a new issue