Browse Source

Bug 6085 : Fixing missing last tag in translation and comments, utf-8 issues to fix now

Signed-off-by: Chris Cormack <chrisc@catalyst.net.nz>
3.4.x
Chris Cormack 13 years ago
parent
commit
312386be6e
  1. 62
      misc/translator/TTParser.pm
  2. 20
      misc/translator/TmplTokenizer.pm

62
misc/translator/TTParser.pm

@ -39,7 +39,10 @@ sub build_tokens{
$self->{filename} = $filename;
$self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
$self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
$self->handler(end => "end", "self, line, tag, text"); #signature is end( self, linenumber, tagename, origional text )
$self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
$self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
$self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
$self->handler(default => "default", "self, line, text, is_cdata"); # anything else
$self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a TmplTokenType::CDATA
$self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
$self->parse_file($filename);
@ -52,7 +55,9 @@ sub text{
my $line = shift;
my $work = shift; # original text
my $is_cdata = shift;
while($work){
# warn "in text line is $line work is $work";
# return if $work =~ m/^\s*$/;
# if there is a template_toolkit tag
if( $work =~ m/\[%.*?\]/ ){
@ -69,6 +74,7 @@ sub text{
#put work still to do back into work
$work = $' ? $' : 0;
} else {
# warn "in the text else work is now $work";
#If there is some left over work, treat it as text token
my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
@ -77,6 +83,37 @@ sub text{
}
}
sub declaration {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
# warn "declaration work is $work";
my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub comment {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
# warn "comment work is $work";
my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
sub default {
my $self = shift;
my $line = shift;
my $work = shift; #original text
my $is_cdata = shift;
# warn "comment work is $work";
my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
push @tokens, $t;
}
#handle opening html tags
sub start{
my $self = shift;
@ -84,7 +121,8 @@ sub start{
my $tag = shift;
my $hash = shift; #hash of attr/value pairs
my $text = shift; #origional text
#return if ! $interesting_tags{$tag};
# warn "in start text is $text";
# return if ! $interesting_tags{$tag};
# was $hash->{$key}
# print "#### " . $self->{filename} . " " . $tag . "####\n";
my $t = TmplToken->new( $text, TmplTokenType::TAG, $line, $self->{filename});
@ -100,12 +138,22 @@ sub start{
#handle closing html tags
sub end{
my $self = shift;
my $line = shift;
my $tag = shift;
my $text = shift;
my $self = shift;
my $line = shift;
my $tag = shift;
my $hash = shift;
my $text = shift;
# warn "in end text is $text";
# what format should this be in?
my $t = TmplToken->new( $text, TmplTokenType::TAG, $line, $self->{filename} );
my $t = TmplToken->new( $text, TmplTokenType::TAG, $line, $self->{filename} );
my %attr;
# tags seem to be uses in an 'interesting' way elsewhere..
for my $key( %$hash ) {
next unless defined $hash->{$key};
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
}
$t->set_attributes( \%attr );
push @tokens, $t;
}
1;

20
misc/translator/TmplTokenizer.pm

@ -305,21 +305,33 @@ sub next_token {
# parts that make up a text_parametrized (future children of the token)
my @parts = ();
while(1){
# warn Dumper @parts;
$next = $self->{_parser}->next_token;
return undef unless defined $next;
if (! $next){
if (@parts){
return $self->_parametrize_internal(@parts);
}
else {
return undef;
}
}
# if cformat mode is off, dont bother parametrizing, just return them as they come
return $next unless $self->allow_cformat_p;
if( $next->type == TmplTokenType::TEXT ){
push @parts, $next;
} elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
}
elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
push @parts, $next;
} else{
}
else {
# if there is nothing in parts, return this token
return $next unless @parts;
return $next unless @parts;
# OTHERWISE, put this token back and return the parametrized string of @parts
$self->{_parser}->unshift_token($next);
return $self->_parametrize_internal(@parts);
}
}
}

Loading…
Cancel
Save