Koha/C4/TTParser.pm
Frédéric Demians a41100a445 Bug 6458 Template Toolkit files test case
This test validate Template Toolkit (TT) Koha files.

For the time being an unique validation is done: Test if TT files
contain TT directive within HTML tag. For example:

  <li[% IF

This kind of constuction MUST be avoided because it break Koha
translation process.

This patch transform also translation specific modules into C4 modules
in order to be able to use them in test case:

  C4::TTPaser
  C4::TmplToken
  C4::TmplTokenType

This patch is a Perl adaptation of a Haskell script from Frère Sébastien
Marie.

Signed-off-by: Katrin Fischer <Katrin.Fischer.83@web.de>
Notes on testing:
- translate install de-DE - worked ok
- translate update de-DE > translate install de-DE - worked ok
- running the test xt/tt_valid.t - worked ok and pointed out lots of problems.
Found no problems.

Signed-off-by: Katrin Fischer <Katrin.Fischer.83@web.de>
Signed-off-by: Chris Cormack <chrisc@catalyst.net.nz>
2011-08-18 21:11:50 +12:00

153 lines
5.1 KiB
Perl
Executable file

#!/usr/bin/env perl
#simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
package C4::TTParser;
use base qw(HTML::Parser);
use C4::TmplToken;
use strict;
use warnings;
#seems to be handled post tokenizer
##hash where key is tag we are interested in and the value is a hash of the attributes we want
#my %interesting_tags = (
# img => { alt => 1 },
#);
#tokens found so far (used like a stack)
my ( @tokens );
#shiftnext token or undef
sub next_token{
return shift @tokens;
}
#unshift token back on @tokens
sub unshift_token{
my $self = shift;
unshift @tokens, shift;
}
#have a peep at next token
sub peep_token{
return $tokens[0];
}
#wrapper for parse
#please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
#signature build_tokens( self, filename)
sub build_tokens{
my ($self, $filename) = @_;
$self->{filename} = $filename;
$self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
$self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
$self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
$self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
$self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
# $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
$self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
$self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
$self->parse_file($filename);
return $self;
}
#handle parsing of text
sub text{
my $self = shift;
my $line = shift;
my $work = shift; # original text
my $is_cdata = shift;
while($work){
# if there is a template_toolkit tag
if( $work =~ m/\[%.*?\]/ ){
#everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
if( $` ){
my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
#the match itself is a DIRECTIVE $&
my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
push @tokens, $t;
# put work still to do back into work
$work = $' ? $' : 0;
} else {
# If there is some left over work, treat it as text token
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
last;
}
}
}
sub declaration {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub comment {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub default {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
#handle opening html tags
sub start{
my $self = shift;
my $line = shift;
my $tag = shift;
my $hash = shift; #hash of attr/value pairs
my $text = shift; #origional text
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
my %attr;
# tags seem to be uses in an 'interesting' way elsewhere..
for my $key( %$hash ) {
next unless defined $hash->{$key};
if ($key eq "/"){
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
}
else {
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
}
}
$t->set_attributes( \%attr );
push @tokens, $t;
}
#handle closing html tags
sub end{
my $self = shift;
my $line = shift;
my $tag = shift;
my $hash = shift;
my $text = shift;
# what format should this be in?
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
my %attr;
# tags seem to be uses in an 'interesting' way elsewhere..
for my $key( %$hash ) {
next unless defined $hash->{$key};
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
}
$t->set_attributes( \%attr );
push @tokens, $t;
}
1;