Bug 6085 : Fixing missing last tag in translation and comments, utf-8 issues to fix now

Signed-off-by: Chris Cormack <chrisc@catalyst.net.nz>
13 years ago · 312386be6e
2 changed files with 71 additions and 11 deletions
--- a/misc/translator/TTParser.pm
+++ b/misc/translator/TTParser.pm
@ -39,7 +39,10 @@ sub build_tokens{
    $self->{filename} = $filename;
    $self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
    $self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
-    $self->handler(end => "end", "self, line, tag, text"); #signature is end( self, linenumber, tagename, origional text )
+    $self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
+    $self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
+    $self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
+    $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
    $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a TmplTokenType::CDATA
    $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
    $self->parse_file($filename);
@ -52,7 +55,9 @@ sub text{
    my $line = shift;
    my $work = shift; # original text
    my $is_cdata = shift;
+
    while($work){
+#            warn "in text line is $line work is $work";
 #        return if $work =~ m/^\s*$/;
        # if there is a template_toolkit tag
        if( $work =~ m/\[%.*?\]/ ){
@ -69,6 +74,7 @@ sub text{
            #put work still to do back into work
            $work = $' ? $' : 0;
        } else {
+#            warn "in the text else work is now $work";
            #If there is some left over work, treat it as text token
            my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
            push @tokens, $t;
@ -77,6 +83,37 @@ sub text{
    }
 }

+sub declaration {
+    my $self = shift;
+    my $line = shift;
+    my $work = shift; #original text
+    my $is_cdata = shift;
+#      warn "declaration work is $work";
+    my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
+    push @tokens, $t;  
+}      
+
+sub comment {
+    my $self = shift;
+    my $line = shift;
+    my $work = shift; #original text
+    my $is_cdata = shift;
+#      warn "comment work is $work";
+    my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
+    push @tokens, $t;  
+}      
+
+sub default {
+    my $self = shift;
+    my $line = shift;
+    my $work = shift; #original text
+    my $is_cdata = shift;
+#      warn "comment work is $work";
+    my $t = TmplToken->new( $work, ($is_cdata? TmplTokenType::CDATA : TmplTokenType::TEXT), $line, $self->{filename} );
+    push @tokens, $t;  
+}      
+
+
 #handle opening html tags
 sub start{
    my $self = shift;
@ -84,7 +121,8 @@ sub start{
    my $tag = shift;
    my $hash = shift; #hash of attr/value pairs
    my $text = shift; #origional text
-    #return if ! $interesting_tags{$tag};
+#      warn "in start text is $text";
+    # return if ! $interesting_tags{$tag};
    # was $hash->{$key}
    # print "#### " . $self->{filename}  . " " . $tag . "####\n";
    my $t = TmplToken->new( $text, TmplTokenType::TAG, $line, $self->{filename});
@ -100,12 +138,22 @@ sub start{

 #handle closing html tags
 sub end{
-  my $self = shift;
-  my $line = shift;
-  my $tag = shift;
-  my $text = shift;
+    my $self = shift;
+    my $line = shift;
+    my $tag = shift;
+    my $hash = shift;
+    my $text = shift;
+#  warn "in end text is $text";  
  # what format should this be in?
-  my $t = TmplToken->new( $text, TmplTokenType::TAG, $line, $self->{filename} );
+    my $t = TmplToken->new( $text, TmplTokenType::TAG, $line, $self->{filename} );
+    my %attr;
+    # tags seem to be uses in an 'interesting' way elsewhere..
+    for my $key( %$hash ) {
+        next unless defined $hash->{$key};
+        $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
+    }
+    $t->set_attributes( \%attr );
+    push @tokens, $t;
 }

 1;
--- a/misc/translator/TmplTokenizer.pm
+++ b/misc/translator/TmplTokenizer.pm
@ -305,21 +305,33 @@ sub next_token {
    # parts that make up a text_parametrized (future children of the token)
    my @parts = ();
    while(1){
+        # warn Dumper @parts;
        $next = $self->{_parser}->next_token;
-        return undef unless defined $next;
+        if (! $next){
+            if (@parts){
+                return $self->_parametrize_internal(@parts);
+            }
+            else {
+                return undef;
+            }
+        }
        # if cformat mode is off, dont bother parametrizing, just return them as they come
        return $next unless $self->allow_cformat_p;
        if( $next->type == TmplTokenType::TEXT ){
            push @parts, $next;
-        } elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
+        } 
+        elsif( $next->type == TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
            push @parts, $next;
-        } else{
+        } 
+        else {
            # if there is nothing in parts, return this token
-            return $next unless @parts;
+ 
+           return $next unless @parts;
            # OTHERWISE, put this token back and return the parametrized string of @parts
            $self->{_parser}->unshift_token($next);
            return $self->_parametrize_internal(@parts);
        }
+
    }
 }