David Cook
e692b40523
Signed-off-by: Joonas Kylmälä <joonas.kylmala@helsinki.fi> Signed-off-by: Katrin Fischer <katrin.fischer.83@web.de> Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>
180 lines
6.2 KiB
Perl
180 lines
6.2 KiB
Perl
# Copyright Tamil 2011
|
|
#
|
|
# This file is part of Koha.
|
|
#
|
|
# Koha is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Koha is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Koha; if not, see <http://www.gnu.org/licenses>.
|
|
|
|
#simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
|
|
package C4::TTParser;
|
|
use base qw(HTML::Parser);
|
|
use C4::TmplToken;
|
|
use strict;
|
|
use warnings;
|
|
|
|
#seems to be handled post tokenizer
|
|
##hash where key is tag we are interested in and the value is a hash of the attributes we want
|
|
#my %interesting_tags = (
|
|
# img => { alt => 1 },
|
|
#);
|
|
|
|
#tokens found so far (used like a stack)
|
|
my ( @tokens );
|
|
|
|
#shiftnext token or undef
|
|
sub next_token{
|
|
return shift @tokens;
|
|
}
|
|
|
|
#unshift token back on @tokens
|
|
sub unshift_token{
|
|
my $self = shift;
|
|
unshift @tokens, shift;
|
|
}
|
|
|
|
#have a peep at next token
|
|
sub peep_token{
|
|
return $tokens[0];
|
|
}
|
|
|
|
#wrapper for parse
|
|
#please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
|
|
#signature build_tokens( self, filename)
|
|
sub build_tokens{
|
|
my ($self, $filename) = @_;
|
|
$self->{filename} = $filename;
|
|
$self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, original text )
|
|
$self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, original text, is_cdata )
|
|
$self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, original text )
|
|
$self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
|
|
$self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
|
|
$self->handler(process => "process", "self, line, text, is_cdata"); # processing statement <?...?>
|
|
# $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
|
|
$self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
|
|
$self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
|
|
open(my $fh, "<:encoding(utf8)", $filename) || die "Cannot open $filename ($!)";
|
|
$self->parse_file($fh);
|
|
return $self;
|
|
}
|
|
|
|
#handle parsing of text
|
|
sub text{
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; # original text
|
|
my $is_cdata = shift;
|
|
while($work){
|
|
# if there is a template_toolkit tag
|
|
if( $work =~ m/\[%.*?%\]/ ){
|
|
#everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
|
|
if( $` ){
|
|
my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
#the match itself is a DIRECTIVE $&
|
|
my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
|
|
# put work still to do back into work
|
|
$work = $' ? $' : 0;
|
|
} else {
|
|
# If there is some left over work, treat it as text token
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
|
|
push @tokens, $t;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub declaration {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
sub comment {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
sub process {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
sub default {
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $work = shift; #original text
|
|
my $is_cdata = shift;
|
|
my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
|
|
#handle opening html tags
|
|
sub start{
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $tag = shift;
|
|
my $hash = shift; #hash of attr/value pairs
|
|
my $text = shift; #original text
|
|
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
|
|
my %attr;
|
|
# tags seem to be uses in an 'interesting' way elsewhere..
|
|
for my $key( %$hash ) {
|
|
next unless defined $hash->{$key};
|
|
if ($key eq "/"){
|
|
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
|
|
}
|
|
else {
|
|
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
|
|
}
|
|
}
|
|
$t->set_attributes( \%attr );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
#handle closing html tags
|
|
sub end{
|
|
my $self = shift;
|
|
my $line = shift;
|
|
my $tag = shift;
|
|
my $hash = shift;
|
|
my $text = shift;
|
|
# what format should this be in?
|
|
my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
|
|
my %attr;
|
|
# tags seem to be uses in an 'interesting' way elsewhere..
|
|
for my $key( %$hash ) {
|
|
next unless defined $hash->{$key};
|
|
$attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
|
|
}
|
|
$t->set_attributes( \%attr );
|
|
push @tokens, $t;
|
|
}
|
|
|
|
1;
|