Some functions should not be in the module; these are now removed.
This commit is contained in:
parent
59d2e35180
commit
0b6030aecd
2 changed files with 2 additions and 97 deletions
|
@ -265,31 +265,6 @@ sub next_token (*) {
|
|||
|
||||
###############################################################################
|
||||
|
||||
sub debug_dump (*) { # for testing only
|
||||
my($h) = @_;
|
||||
print "re_tag_compat is /$re_tag_compat/\n";
|
||||
for (;;) {
|
||||
my $s = next_token $h;
|
||||
last unless defined $s;
|
||||
printf "%s\n", ('-' x 79);
|
||||
my($kind, $t, $attr) = @$s; # FIXME
|
||||
printf "%s:\n", $kind;
|
||||
printf "%4dH%s\n", length($t),
|
||||
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
|
||||
if ($kind eq KIND_TAG && %$attr) {
|
||||
printf "Attributes:\n";
|
||||
for my $a (keys %$attr) {
|
||||
my($key, $val, $val_orig, $order) = @{$attr->{$a}};
|
||||
printf "%s = %dH%s -- %s\n", $a, length $val,
|
||||
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $val)),
|
||||
$val_orig;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
sub trim ($) {
|
||||
my($s) = @_;
|
||||
$s =~ s/^(?:\s|\ $re_end_entity)+//os;
|
||||
|
@ -299,67 +274,6 @@ sub trim ($) {
|
|||
|
||||
###############################################################################
|
||||
|
||||
sub text_extract (*) {
|
||||
my($h) = @_;
|
||||
my %text = ();
|
||||
for (;;) {
|
||||
my $s = next_token $h;
|
||||
last unless defined $s;
|
||||
my($kind, $t, $attr) = @$s; # FIXME
|
||||
if ($kind eq KIND_TEXT) {
|
||||
$t = trim $t;
|
||||
$text{$t} = 1 if $t =~ /\S/s;
|
||||
} elsif ($kind eq KIND_TAG && %$attr) {
|
||||
# value [tag=input], meta
|
||||
my $tag = lc($1) if $t =~ /^<(\S+)/s;
|
||||
for my $a ('alt', 'content', 'title', 'value') {
|
||||
if ($attr->{$a}) {
|
||||
next if $a eq 'content' && $tag ne 'meta';
|
||||
next if $a eq 'value' && ($tag ne 'input'
|
||||
|| (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
|
||||
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
|
||||
$val = trim $val;
|
||||
$text{$val} = 1 if $val =~ /\S/s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
# Emit all extracted strings.
|
||||
# Don't emit pure whitespace, pure numbers, or TMPL_VAR's.
|
||||
for my $t (keys %text) {
|
||||
printf "%s\n", $t
|
||||
unless $t =~ /^(?:\s|\ $re_end_entity|$re_tmpl_var)*$/os || $t =~ /^\d+$/;
|
||||
}
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
sub usage ($) {
|
||||
my($exitcode) = @_;
|
||||
my $h = $exitcode? *STDERR: *STDOUT;
|
||||
print $h <<EOF;
|
||||
Usage: $0 [OPTIONS]
|
||||
Extract strings from HTML file.
|
||||
|
||||
--debug-dump-only Do not extract strings; but display scanned tokens
|
||||
-f, --file=FILE Extract from the specified FILE
|
||||
--pedantic-warnings Issue warnings even for detected problems which
|
||||
are likely to be harmless
|
||||
--help Display this help and exit
|
||||
EOF
|
||||
exit($exitcode);
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
sub usage_error (;$) {
|
||||
print STDERR "$_[0]\n" if @_;
|
||||
print STDERR "Try `$0 --help' for more information.\n";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
=head1 FUTURE PLANS
|
||||
|
||||
Code could be written to detect template variables and
|
||||
|
|
|
@ -50,15 +50,6 @@ sub debug_dump (*) { # for testing only
|
|||
|
||||
###############################################################################
|
||||
|
||||
sub trim ($) {
|
||||
my($s) = @_;
|
||||
$s =~ s/^(?:\s|\ $TmplTokenizer::re_end_entity)+//os;
|
||||
$s =~ s/(?:\s|\ $TmplTokenizer::re_end_entity)+$//os;
|
||||
return $s;
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
|
||||
sub text_extract (*) {
|
||||
my($h) = @_;
|
||||
my %text = ();
|
||||
|
@ -67,7 +58,7 @@ sub text_extract (*) {
|
|||
last unless defined $s;
|
||||
my($kind, $t, $attr) = @$s; # FIXME
|
||||
if ($kind eq TmplTokenizer::KIND_TEXT) {
|
||||
$t = trim $t;
|
||||
$t = TmplTokenizer::trim $t;
|
||||
$text{$t} = 1 if $t =~ /\S/s;
|
||||
} elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
|
||||
# value [tag=input], meta
|
||||
|
@ -78,7 +69,7 @@ sub text_extract (*) {
|
|||
next if $a eq 'value' && ($tag ne 'input'
|
||||
|| (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
|
||||
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
|
||||
$val = trim $val;
|
||||
$val = TmplTokenizer::trim $val;
|
||||
$text{$val} = 1 if $val =~ /\S/s;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue