Frédéric Demians
a41100a445
This test validate Template Toolkit (TT) Koha files. For the time being an unique validation is done: Test if TT files contain TT directive within HTML tag. For example: <li[% IF This kind of constuction MUST be avoided because it break Koha translation process. This patch transform also translation specific modules into C4 modules in order to be able to use them in test case: C4::TTPaser C4::TmplToken C4::TmplTokenType This patch is a Perl adaptation of a Haskell script from Frère Sébastien Marie. Signed-off-by: Katrin Fischer <Katrin.Fischer.83@web.de> Notes on testing: - translate install de-DE - worked ok - translate update de-DE > translate install de-DE - worked ok - running the test xt/tt_valid.t - worked ok and pointed out lots of problems. Found no problems. Signed-off-by: Katrin Fischer <Katrin.Fischer.83@web.de> Signed-off-by: Chris Cormack <chrisc@catalyst.net.nz>
153 lines
5.1 KiB
Perl
Executable file
153 lines
5.1 KiB
Perl
Executable file
#!/usr/bin/env perl
|
|
#simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
|
|
package C4::TTParser;
|
|
use base qw(HTML::Parser);
|
|
use C4::TmplToken;
|
|
use strict;
|
|
use warnings;
|
|
|
|
#seems to be handled post tokenizer
|
|
##hash where key is tag we are interested in and the value is a hash of the attributes we want
|
|
#my %interesting_tags = (
|
|
# img => { alt => 1 },
|
|
#);
|
|
|
|
#tokens found so far (used like a stack)
|
|
my ( @tokens );
|
|
|
|
#shiftnext token or undef
|
|
sub next_token{
|
|
return shift @tokens;
|
|
}
|
|
|
|
#unshift token back on @tokens
|
|
sub unshift_token{
|
|
my $self = shift;
|
|
unshift @tokens, shift;
|
|
}
|
|
|
|
#have a peep at next token
|
|
sub peep_token{
|
|
return $tokens[0];
|
|
}
|
|
|
|
#wrapper for parse
|
|
#please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
|
|
#signature build_tokens( self, filename)
|
|
sub build_tokens{
|
|
my ($self, $filename) = @_;
|
|
$self->{filename} = $filename;
|
|
$self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
|
|
$self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
|
|
$self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
|
|
$self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
|
|
$self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
|
|
# $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
|
|
$self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
|
|
$self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
|
|
$self->parse_file($filename);
|
|
return $self;
|
|
}
|
|
|
|
#handle parsing of text
|
|
sub text{
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; # original text
|
|
my $is_cdata = shift;
|
|
while($work){
|
|
# if there is a template_toolkit tag
|
|
if( $work =~ m/\[%.*?\]/ ){
|
|
#everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
|
|
if( $` ){
|
|
my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
#the match itself is a DIRECTIVE $&
|
|
my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
|
|
# put work still to do back into work
|
|
$work = $' ? $' : 0;
|
|
} else {
|
|
# If there is some left over work, treat it as text token
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
|
|
push @tokens, $t;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub declaration {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
sub comment {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
sub default {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
|
|
#handle opening html tags
|
|
sub start{
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $tag = shift;
|
|
my $hash = shift; #hash of attr/value pairs
|
|
my $text = shift; #origional text
|
|
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
|
|
my %attr;
|
|
# tags seem to be uses in an 'interesting' way elsewhere..
|
|
for my $key( %$hash ) {
|
|
next unless defined $hash->{$key};
|
|
if ($key eq "/"){
|
|
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
|
|
}
|
|
else {
|
|
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
|
|
}
|
|
}
|
|
$t->set_attributes( \%attr );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
#handle closing html tags
|
|
sub end{
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $tag = shift;
|
|
my $hash = shift;
|
|
my $text = shift;
|
|
# what format should this be in?
|
|
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
|
|
my %attr;
|
|
# tags seem to be uses in an 'interesting' way elsewhere..
|
|
for my $key( %$hash ) {
|
|
next unless defined $hash->{$key};
|
|
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
|
|
}
|
|
$t->set_attributes( \%attr );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
1;
|