Some functions should not be in the module; these are now removed.
This commit is contained in:
parent
59d2e35180
commit
0b6030aecd
2 changed files with 2 additions and 97 deletions
|
@ -265,31 +265,6 @@ sub next_token (*) {
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
sub debug_dump (*) { # for testing only
|
|
||||||
my($h) = @_;
|
|
||||||
print "re_tag_compat is /$re_tag_compat/\n";
|
|
||||||
for (;;) {
|
|
||||||
my $s = next_token $h;
|
|
||||||
last unless defined $s;
|
|
||||||
printf "%s\n", ('-' x 79);
|
|
||||||
my($kind, $t, $attr) = @$s; # FIXME
|
|
||||||
printf "%s:\n", $kind;
|
|
||||||
printf "%4dH%s\n", length($t),
|
|
||||||
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
|
|
||||||
if ($kind eq KIND_TAG && %$attr) {
|
|
||||||
printf "Attributes:\n";
|
|
||||||
for my $a (keys %$attr) {
|
|
||||||
my($key, $val, $val_orig, $order) = @{$attr->{$a}};
|
|
||||||
printf "%s = %dH%s -- %s\n", $a, length $val,
|
|
||||||
join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $val)),
|
|
||||||
$val_orig;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
sub trim ($) {
|
sub trim ($) {
|
||||||
my($s) = @_;
|
my($s) = @_;
|
||||||
$s =~ s/^(?:\s|\ $re_end_entity)+//os;
|
$s =~ s/^(?:\s|\ $re_end_entity)+//os;
|
||||||
|
@ -299,67 +274,6 @@ sub trim ($) {
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
sub text_extract (*) {
|
|
||||||
my($h) = @_;
|
|
||||||
my %text = ();
|
|
||||||
for (;;) {
|
|
||||||
my $s = next_token $h;
|
|
||||||
last unless defined $s;
|
|
||||||
my($kind, $t, $attr) = @$s; # FIXME
|
|
||||||
if ($kind eq KIND_TEXT) {
|
|
||||||
$t = trim $t;
|
|
||||||
$text{$t} = 1 if $t =~ /\S/s;
|
|
||||||
} elsif ($kind eq KIND_TAG && %$attr) {
|
|
||||||
# value [tag=input], meta
|
|
||||||
my $tag = lc($1) if $t =~ /^<(\S+)/s;
|
|
||||||
for my $a ('alt', 'content', 'title', 'value') {
|
|
||||||
if ($attr->{$a}) {
|
|
||||||
next if $a eq 'content' && $tag ne 'meta';
|
|
||||||
next if $a eq 'value' && ($tag ne 'input'
|
|
||||||
|| (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
|
|
||||||
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
|
|
||||||
$val = trim $val;
|
|
||||||
$text{$val} = 1 if $val =~ /\S/s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# Emit all extracted strings.
|
|
||||||
# Don't emit pure whitespace, pure numbers, or TMPL_VAR's.
|
|
||||||
for my $t (keys %text) {
|
|
||||||
printf "%s\n", $t
|
|
||||||
unless $t =~ /^(?:\s|\ $re_end_entity|$re_tmpl_var)*$/os || $t =~ /^\d+$/;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
sub usage ($) {
|
|
||||||
my($exitcode) = @_;
|
|
||||||
my $h = $exitcode? *STDERR: *STDOUT;
|
|
||||||
print $h <<EOF;
|
|
||||||
Usage: $0 [OPTIONS]
|
|
||||||
Extract strings from HTML file.
|
|
||||||
|
|
||||||
--debug-dump-only Do not extract strings; but display scanned tokens
|
|
||||||
-f, --file=FILE Extract from the specified FILE
|
|
||||||
--pedantic-warnings Issue warnings even for detected problems which
|
|
||||||
are likely to be harmless
|
|
||||||
--help Display this help and exit
|
|
||||||
EOF
|
|
||||||
exit($exitcode);
|
|
||||||
}
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
sub usage_error (;$) {
|
|
||||||
print STDERR "$_[0]\n" if @_;
|
|
||||||
print STDERR "Try `$0 --help' for more information.\n";
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
=head1 FUTURE PLANS
|
=head1 FUTURE PLANS
|
||||||
|
|
||||||
Code could be written to detect template variables and
|
Code could be written to detect template variables and
|
||||||
|
|
|
@ -50,15 +50,6 @@ sub debug_dump (*) { # for testing only
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
sub trim ($) {
|
|
||||||
my($s) = @_;
|
|
||||||
$s =~ s/^(?:\s|\ $TmplTokenizer::re_end_entity)+//os;
|
|
||||||
$s =~ s/(?:\s|\ $TmplTokenizer::re_end_entity)+$//os;
|
|
||||||
return $s;
|
|
||||||
}
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
sub text_extract (*) {
|
sub text_extract (*) {
|
||||||
my($h) = @_;
|
my($h) = @_;
|
||||||
my %text = ();
|
my %text = ();
|
||||||
|
@ -67,7 +58,7 @@ sub text_extract (*) {
|
||||||
last unless defined $s;
|
last unless defined $s;
|
||||||
my($kind, $t, $attr) = @$s; # FIXME
|
my($kind, $t, $attr) = @$s; # FIXME
|
||||||
if ($kind eq TmplTokenizer::KIND_TEXT) {
|
if ($kind eq TmplTokenizer::KIND_TEXT) {
|
||||||
$t = trim $t;
|
$t = TmplTokenizer::trim $t;
|
||||||
$text{$t} = 1 if $t =~ /\S/s;
|
$text{$t} = 1 if $t =~ /\S/s;
|
||||||
} elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
|
} elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
|
||||||
# value [tag=input], meta
|
# value [tag=input], meta
|
||||||
|
@ -78,7 +69,7 @@ sub text_extract (*) {
|
||||||
next if $a eq 'value' && ($tag ne 'input'
|
next if $a eq 'value' && ($tag ne 'input'
|
||||||
|| (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
|
|| (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
|
||||||
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
|
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
|
||||||
$val = trim $val;
|
$val = TmplTokenizer::trim $val;
|
||||||
$text{$val} = 1 if $val =~ /\S/s;
|
$text{$val} = 1 if $val =~ /\S/s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue