misc/translator/TmplTokenizer.pm

   1 package TmplTokenizer;
   2
   3 use Modern::Perl;
   4 use C4::TmplTokenType;
   5 use C4::TmplToken;
   6 use C4::TTParser;
   7 use VerboseWarnings qw( pedantic_p error_normal warn_normal warn_pedantic );
   8 require Exporter;
   9
  10 ###############################################################################
  11
  12 =head1 NAME
  13
  14 TmplTokenizer.pm - Simple-minded wrapper class for TTParser
  15
  16 =head1 DESCRIPTION
  17
  18 A wrapper for the functionality found in TTParser to allow an easier transition to Template Toolkit
  19
  20 =cut
  21
  22 ###############################################################################
  23
  24
  25 use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
  26 use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
  27 use vars qw( $pedantic_error_markup_in_pcdata_p );
  28
  29 ###############################################################################
  30
  31 # Hideous stuff
  32 use vars qw( $re_xsl $re_end_entity $re_tmpl_var);
  33 BEGIN {
  34     $re_tmpl_var = q{\[%\s*[get|set|default]?\s*[\w\.]+\s*[|.*?]?\s*%\]};
  35     $re_xsl = q{<\/?(?:xsl:)(?:[\s\-a-zA-Z0-9"'\/\.\[\]\@\(\):=,$]+)\/?>};
  36     $re_end_entity = '(?:;|$|(?=\s))'; # semicolon or before-whitespace
  37 }
  38 # End of the hideous stuff
  39
  40 use vars qw( $serial );
  41
  42 ###############################################################################
  43
  44 sub FATAL_P             () {'fatal-p'}
  45 sub SYNTAXERROR_P       () {'syntaxerror-p'}
  46
  47 sub FILENAME            () {'input'}
  48 #sub HANDLE             () {'handle'}
  49
  50 #sub READAHEAD          () {'readahead'}
  51 sub LINENUM_START       () {'lc_0'}
  52 sub LINENUM             () {'lc'}
  53 sub CDATA_MODE_P        () {'cdata-mode-p'}
  54 sub CDATA_CLOSE         () {'cdata-close'}
  55 #sub PCDATA_MODE_P      () {'pcdata-mode-p'}    # additional submode for CDATA
  56 sub JS_MODE_P           () {'js-mode-p'}        # cdata-mode-p must also be true
  57
  58 sub ALLOW_CFORMAT_P     () {'allow-cformat-p'}
  59
  60 sub new {
  61     shift;
  62     my ($filename) = @_;
  63     #open my $handle,$filename or die "can't open $filename";
  64     my $parser = C4::TTParser->new;
  65     $parser->build_tokens( $filename );
  66     bless {
  67       filename => $filename,
  68       _parser => $parser
  69 #     , handle => $handle
  70 #     , readahead => []
  71     } , __PACKAGE__;
  72 }
  73
  74 ###############################################################################
  75
  76 # Simple getters
  77
  78 sub filename {
  79     my $this = shift;
  80     return $this->{filename};
  81 }
  82
  83 sub fatal_p {
  84     my $this = shift;
  85     return $this->{+FATAL_P};
  86 }
  87
  88 # work around, currently not implemented
  89 sub syntaxerror_p {
  90 #    my $this = shift;
  91 #    return $this->{+SYNTAXERROR_P};
  92     return 0;
  93 }
  94
  95 sub js_mode_p {
  96     my $this = shift;
  97     return $this->{+JS_MODE_P};
  98 }
  99
 100 sub allow_cformat_p {
 101     my $this = shift;
 102     return $this->{+ALLOW_CFORMAT_P};
 103 }
 104
 105 # Simple setters
 106
 107 sub _set_fatal {
 108     my $this = shift;
 109     $this->{+FATAL_P} = $_[0];
 110     return $this;
 111 }
 112
 113 sub _set_js_mode {
 114     my $this = shift;
 115     $this->{+JS_MODE_P} = $_[0];
 116     return $this;
 117 }
 118
 119 #used in xgettext, tmpl_process3
 120 sub set_allow_cformat {
 121     my $this = shift;
 122     $this->{+ALLOW_CFORMAT_P} = $_[0];
 123     return $this;
 124 }
 125
 126 ###############################################################################
 127
 128 use vars qw( $js_EscapeSequence );
 129 BEGIN {
 130     # Perl quoting is really screwed up, but this common subexp is way too long
 131     $js_EscapeSequence = q{\\\\(?:['"\\\\bfnrt]|[^0-7xu]|[0-3]?[0-7]{1,2}|x[\da-fA-F]{2}|u[\da-fA-F]{4})};
 132 }
 133 sub parenleft  () { '(' }
 134 sub parenright () { ')' }
 135
 136 sub _split_js {
 137     my ($s0) = @_;
 138     my @it = ();
 139     while (length $s0) {
 140         if ($s0 =~ /^\s+/s) {                           # whitespace
 141           push @it, $&;
 142           $s0 = $';
 143         } elsif ($s0 =~ /^\/\/[^\r\n]*(?:[\r\n]|$)/s) { # C++-style comment
 144         push @it, $&;
 145         $s0 = $';
 146         } elsif ($s0 =~ /^\/\*(?:(?!\*\/).)*\*\//s) {   # C-style comment
 147             push @it, $&;
 148             $s0 = $';
 149         # Keyword or identifier, ECMA-262 p.13 (section 7.5)
 150         } elsif ($s0 =~ /^[A-Z_\$][A-Z\d_\$]*/is) {     # IdentifierName
 151             push @it, $&;
 152             $s0 = $';
 153         # Punctuator, ECMA-262 p.13 (section 7.6)
 154         } elsif ($s0 =~ /^(?:[\(\){}\[\];]|>>>=|<<=|>>=|[-\+\*\/\&\|\^\%]=|>>>|<<|>>|--|\+\+|\|\||\&\&|==|<=|>=|!=|[=><,!~\?:\.\-\+\*\/\&\|\^\%])/s) {
 155             push @it, $&;
 156             $s0 = $';
 157         # DecimalLiteral, ECMA-262 p.14 (section 7.7.3); note: bug in the spec
 158         } elsif ($s0 =~ /^(?:0|[1-9]\d+(?:\.\d*(?:[eE][-\+]?\d+)?)?)/s) {
 159             push @it, $&;
 160             $s0 = $';
 161         # HexIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
 162         } elsif ($s0 =~ /^0[xX][\da-fA-F]+/s) {
 163             push @it, $&;
 164             $s0 = $';
 165         # OctalIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
 166         } elsif ($s0 =~ /^0[\da-fA-F]+/s) {
 167             push @it, $&;
 168             $s0 = $';
 169         # StringLiteral, ECMA-262 p.17 (section 7.7.4)
 170         # XXX SourceCharacter doesn't seem to be defined (?)
 171         } elsif ($s0 =~ /^(?:"(?:(?!["\\\r\n]).|$js_EscapeSequence)*"|'(?:(?!['\\\r\n]).|$js_EscapeSequence)*')/os) {
 172             push @it, $&;
 173             $s0 = $';
 174         } elsif ($s0 =~ /^./) {                         # UNKNOWN TOKEN !!!
 175             push @it, $&;
 176             $s0 = $';
 177         }
 178     }
 179     return @it;
 180 }
 181
 182 sub STATE_UNDERSCORE     () { 1 }
 183 sub STATE_PARENLEFT      () { 2 }
 184 sub STATE_STRING_LITERAL () { 3 }
 185
 186 # XXX This is a crazy hack. I don't want to write an ECMAScript parser.
 187 # XXX A scanner is one thing; a parser another thing.
 188 sub _identify_js_translatables {
 189     my @input = @_;
 190     my @output = ();
 191     # We mark a JavaScript translatable string as in C, i.e., _("literal")
 192     # For simplicity, we ONLY look for "_" "(" StringLiteral ")"
 193     for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
 194 #        warn $input[$i];
 195         my $reset_state_p = 0;
 196         push @output, [0, $input[$i]];
 197         if ($input[$i] !~ /\S/s) {
 198           ;
 199         } elsif ($state == 0) {
 200           $state = STATE_UNDERSCORE if $input[$i] eq '_';
 201         } elsif ($state == STATE_UNDERSCORE) {
 202           $state = $input[$i] eq parenleft ? STATE_PARENLEFT : 0;
 203         } elsif ($state == STATE_PARENLEFT) {
 204           if ($input[$i] =~ /^(['"])(.*)\1$/s) {
 205             ($state, $j, $q, $s) = (STATE_STRING_LITERAL, $#output, $1, $2);
 206           } else {
 207             $state = 0;
 208           }
 209         } elsif ($state == STATE_STRING_LITERAL) {
 210           if ($input[$i] eq parenright) {
 211             $output[$j] = [1, $output[$j]->[1], $q, $s];
 212           }
 213           $state = 0;
 214         } else {
 215           die "identify_js_translatables internal error: Unknown state $state"
 216         }
 217     }
 218 #    use Data::Dumper;
 219 #    warn Dumper \@output;
 220     return \@output;
 221 }
 222
 223 ###############################################################################
 224
 225 sub string_canon {
 226   my $s = shift;
 227   # Fold all whitespace into single blanks
 228   $s =~ s/\s+/ /g;
 229   $s =~ s/^\s+//g;
 230   return $s;
 231 }
 232
 233 # safer version used internally, preserves new lines
 234 sub string_canon_safe {
 235   my $s = shift;
 236   # fold tabs and spaces into single spaces
 237   $s =~ s/[\ \t]+/ /gs;
 238   return $s;
 239 }
 240
 241
 242 sub _quote_cformat{
 243   my $s = shift;
 244   $s =~ s/%/%%/g;
 245   return $s;
 246 }
 247
 248 sub _formalize_string_cformat{
 249   my $s = shift;
 250   return _quote_cformat( string_canon_safe($s) );
 251 }
 252
 253 sub _formalize{
 254   my $t = shift;
 255   if( $t->type == C4::TmplTokenType::DIRECTIVE ){
 256     return '%s';
 257   } elsif( $t->type == C4::TmplTokenType::TEXT ){
 258     return _formalize_string_cformat( $t->string );
 259   } elsif( $t->type == C4::TmplTokenType::TAG ){
 260     if( $t->string =~ m/^a\b/is ){
 261       return '<a>';
 262     } elsif( $t->string =~ m/^input\b/is ){
 263       if( lc $t->attributes->{'type'}->[1] eq 'text' ){
 264         return '%S';
 265       } else{
 266         return '%p';
 267       }
 268     } else{
 269       return _quote_cformat $t->string;
 270     }
 271   } else{
 272     return _quote_cformat $t->string;
 273   }
 274 }
 275
 276 # internal parametization, used within next_token
 277 # method that takes in an array of TEXT and DIRECTIVE tokens (DIRECTIVEs must be GET) and return a C4::TmplTokenType::TEXT_PARAMETRIZED
 278 sub _parametrize_internal{
 279     my $this = shift;
 280     my @parts = @_;
 281     # my $s = "";
 282     # for my $item (@parts){
 283     #     if( $item->type == C4::TmplTokenType::TEXT ){
 284     #         $s .= $item->string;
 285     #     } else {
 286     #         #must be a variable directive
 287     #         $s .= "%s";
 288     #     }
 289     # }
 290     my $s = join( "", map { _formalize $_ } @parts );
 291     # should both the string and form be $s? maybe only the later? posibly the former....
 292     # used line number from first token, should suffice
 293     my $t = C4::TmplToken->new( $s, C4::TmplTokenType::TEXT_PARAMETRIZED, $parts[0]->line_number, $this->filename );
 294     $t->set_children(@parts);
 295     $t->set_form($s);
 296     return $t;
 297 }
 298
 299 sub next_token {
 300     my $self = shift;
 301     my $next;
 302 #    warn "in next_token";
 303     # parts that make up a text_parametrized (future children of the token)
 304     my @parts = ();
 305     while(1){
 306         $next = $self->{_parser}->next_token;
 307         if (! $next){
 308             if (@parts){
 309                 return $self->_parametrize_internal(@parts);
 310             }
 311             else {
 312                 return;
 313             }
 314         }
 315         # if cformat mode is off, dont bother parametrizing, just return them as they come
 316         return $next unless $self->allow_cformat_p;
 317         if( $next->type == C4::TmplTokenType::TEXT ){
 318             push @parts, $next;
 319         }
 320 #        elsif( $next->type == C4::TmplTokenType::DIRECTIVE && $next->string =~ m/\[%\s*\w+\s*%\]/ ){
 321         elsif( $next->type == C4::TmplTokenType::DIRECTIVE ){
 322             push @parts, $next;
 323         }
 324         elsif ( $next->type == C4::TmplTokenType::CDATA){
 325             $self->_set_js_mode(1);
 326             my $s0 = $next->string;
 327             my @head = ();
 328             my @tail = ();
 329
 330             if ($s0 =~ /^(\s*\[%\s*)(.*)(\s%=]\s*)$/s) {
 331                 push @head, $1;
 332                  push @tail, $3;
 333                 $s0 = $2;
 334             }
 335             push @head, _split_js($s0);
 336             $next->set_js_data(_identify_js_translatables(@head, @tail) );
 337             return $next unless @parts;
 338             $self->{_parser}->unshift_token($next);
 339             return $self->_parametrize_internal(@parts);
 340         }
 341         else {
 342             # if there is nothing in parts, return this token
 343             return $next unless @parts;
 344
 345             # OTHERWISE, put this token back and return the parametrized string of @parts
 346             $self->{_parser}->unshift_token($next);
 347             return $self->_parametrize_internal(@parts);
 348         }
 349
 350     }
 351 }
 352
 353 ###############################################################################
 354
 355 # function taken from old version
 356 # used by tmpl_process3
 357 sub parametrize {
 358     my($fmt_0, $cformat_p, $t, $f) = @_;
 359     my $it = '';
 360     if ($cformat_p) {
 361         my @params = $t->parameters_and_fields;
 362         for (my $n = 0, my $fmt = $fmt_0; length $fmt;) {
 363             if ($fmt =~ /^[^%]+/) {
 364                 $fmt = $';
 365                 $it .= $&;
 366             } elsif ($fmt =~ /^%%/) {
 367                 $fmt = $';
 368                 $it .= '%';
 369             } elsif ($fmt =~ /^%(?:(\d+)\$)?(?:(\d+)(?:\.(\d+))?)?s/s) {
 370                 $n += 1;
 371                 my($i, $width, $prec) = ((defined $1? $1: $n), $2, $3);
 372                 $fmt = $';
 373                 if (defined $width && defined $prec && !$width && !$prec) {
 374                     ;
 375                 } elsif (defined $params[$i - 1]) {
 376                     my $param = $params[$i - 1];
 377             warn_normal("$fmt_0: $&: Expected a TMPL_VAR, but found a "
 378                 . $param->type->to_string . "\n", undef)
 379                 if $param->type != C4::TmplTokenType::DIRECTIVE;
 380             warn_normal("$fmt_0: $&: Unsupported "
 381                 . "field width or precision\n", undef)
 382                             if defined $width || defined $prec;
 383             warn_normal("$fmt_0: $&: Parameter $i not known", undef)
 384                             unless defined $param;
 385                     $it .= defined $f? &$f( $param ): $param->string;
 386                 }
 387             } elsif ($fmt =~ /^%(?:(\d+)\$)?(?:(\d+)(?:\.(\d+))?)?([pS])/s) {
 388                 $n += 1;
 389                 my($i, $width, $prec, $conv) = ((defined $1? $1: $n), $2, $3, $4);
 390                 $fmt = $';
 391
 392                 my $param = $params[$i - 1];
 393                 if (!defined $param) {
 394             warn_normal("$fmt_0: $&: Parameter $i not known", undef);
 395                 } else {
 396                     if ($param->type == C4::TmplTokenType::TAG
 397                             && $param->string =~ /^<input\b/is) {
 398                         my $type = defined $param->attributes?
 399                                 lc($param->attributes->{'type'}->[1]): undef;
 400                         if ($conv eq 'S') {
 401                 warn_normal("$fmt_0: $&: Expected type=text, "
 402                     . "but found type=$type", undef)
 403                                     unless $type eq 'text';
 404                         } elsif ($conv eq 'p') {
 405                 warn_normal("$fmt_0: $&: Expected type=radio, "
 406                     . "but found type=$type", undef)
 407                                     unless $type eq 'radio';
 408                         }
 409                     } else {
 410                 warn_normal("$&: Expected an INPUT, but found a "
 411                     . $param->type->to_string . "\n", undef)
 412                     }
 413             warn_normal("$fmt_0: $&: Unsupported "
 414                 . "field width or precision\n", undef)
 415                             if defined $width || defined $prec;
 416                     $it .= defined $f? &$f( $param ): $param->string;
 417                 }
 418             } elsif ($fmt =~ /^%[^%a-zA-Z]*[a-zA-Z]/) {
 419                 $fmt = $';
 420                 $it .= $&;
 421                 die "$&: Unknown or unsupported format specification\n"; #XXX
 422             } else {
 423                 die "$&: Completely confused parametrizing -- msgid: $fmt_0\n";#XXX
 424             }
 425         }
 426     }
 427     my @anchors = $t->anchors;
 428     for (my $n = 0, my $fmt = $it, $it = ''; length $fmt;) {
 429         if ($fmt =~ /^(?:(?!<a\d+>).)+/is) {
 430             $fmt = $';
 431             $it .= $&;
 432         } elsif ($fmt =~ /^<a(\d+)>/is) {
 433             $n += 1;
 434             my $i  = $1;
 435             $fmt = $';
 436             my $anchor = $anchors[$i - 1];
 437         warn_normal("$&: Anchor $1 not found for msgid \"$fmt_0\"", undef) #FIXME
 438                     unless defined $anchor;
 439             $it .= $anchor->string;
 440         } else {
 441             die "Completely confused decoding anchors: $fmt\n";#XXX
 442         }
 443     }
 444     return $it;
 445 }
 446
 447
 448 # Other simple functions (These are not methods)
 449
 450 sub blank_p {
 451     my($s) = @_;
 452     return $s =~ /^(?:\s|\&nbsp$re_end_entity|$re_tmpl_var|$re_xsl)*$/osi;
 453 }
 454
 455 sub trim {
 456     my($s0) = @_;
 457     my $l0 = length $s0;
 458     my $s = $s0;
 459     $s =~ s/^(\s|\&nbsp$re_end_entity)+//os; my $l1 = $l0 - length $s;
 460     $s =~ s/(\s|\&nbsp$re_end_entity)+$//os; my $l2 = $l0 - $l1 - length $s;
 461     return wantarray? (substr($s0, 0, $l1), $s, substr($s0, $l0 - $l2)): $s;
 462 }
 463
 464 sub quote_po {
 465     my($s) = @_;
 466     # Locale::PO->quote is buggy, it doesn't quote newlines :-/
 467     $s =~ s/([\\"])/\\$1/gs;
 468     $s =~ s/\n/\\n/g;
 469     #$s =~ s/[\177-\377]/ sprintf("\\%03o", ord($&)) /egs;
 470     return "\"$s\"";
 471 }
 472
 473 sub charset_canon {
 474     my($charset) = @_;
 475     $charset = uc($charset);
 476     $charset = "$1-$2" if $charset =~ /^(ISO|UTF)(\d.*)/i;
 477     $charset = 'Big5' if $charset eq 'BIG5'; # "Big5" must be in mixed case
 478     return $charset;
 479 }
 480
 481 use vars qw( @latin1_utf8 );
 482 @latin1_utf8 = (
 483     "\302\200", "\302\201", "\302\202", "\302\203", "\302\204", "\302\205",
 484     "\302\206", "\302\207", "\302\210", "\302\211", "\302\212", "\302\213",
 485     "\302\214", "\302\215",   undef,      undef,    "\302\220", "\302\221",
 486     "\302\222", "\302\223", "\302\224", "\302\225", "\302\226", "\302\227",
 487     "\302\230", "\302\231", "\302\232", "\302\233", "\302\234", "\302\235",
 488     "\302\236", "\302\237", "\302\240", "\302\241", "\302\242", "\302\243",
 489     "\302\244", "\302\245", "\302\246", "\302\247", "\302\250", "\302\251",
 490     "\302\252", "\302\253", "\302\254", "\302\255", "\302\256", "\302\257",
 491     "\302\260", "\302\261", "\302\262", "\302\263", "\302\264", "\302\265",
 492     "\302\266", "\302\267", "\302\270", "\302\271", "\302\272", "\302\273",
 493     "\302\274", "\302\275", "\302\276", "\302\277", "\303\200", "\303\201",
 494     "\303\202", "\303\203", "\303\204", "\303\205", "\303\206", "\303\207",
 495     "\303\210", "\303\211", "\303\212", "\303\213", "\303\214", "\303\215",
 496     "\303\216", "\303\217", "\303\220", "\303\221", "\303\222", "\303\223",
 497     "\303\224", "\303\225", "\303\226", "\303\227", "\303\230", "\303\231",
 498     "\303\232", "\303\233", "\303\234", "\303\235", "\303\236", "\303\237",
 499     "\303\240", "\303\241", "\303\242", "\303\243", "\303\244", "\303\245",
 500     "\303\246", "\303\247", "\303\250", "\303\251", "\303\252", "\303\253",
 501     "\303\254", "\303\255", "\303\256", "\303\257", "\303\260", "\303\261",
 502     "\303\262", "\303\263", "\303\264", "\303\265", "\303\266", "\303\267",
 503     "\303\270", "\303\271", "\303\272", "\303\273", "\303\274", "\303\275",
 504     "\303\276", "\303\277" );
 505
 506 sub charset_convert {
 507     my($s, $charset_in, $charset_out) = @_;
 508     if ($s !~ /[\200-\377]/s) { # FIXME: don't worry about iso2022 for now
 509         ;
 510     } elsif ($charset_in eq 'ISO-8859-1' && $charset_out eq 'UTF-8') {
 511         $s =~ s/[\200-\377]/ $latin1_utf8[ord($&) - 128] /egs;
 512     } elsif ($charset_in ne $charset_out) {
 513         VerboseWarnings::warn_normal "conversion from $charset_in to $charset_out is not supported\n", undef;
 514     }
 515     return $s;
 516 }
 517
 518 ###############################################################################
 519
 520 =pod
 521
 522 In addition to the basic scanning, this class will also perform
 523 the following:
 524
 525 =over
 526
 527 =item -
 528
 529 Emulation of c-format strings (see below)
 530
 531 =item -
 532
 533 Display of warnings for certain things that affects either the
 534 ability of this class to yield correct output, or things that
 535 are known to cause the original template to cause trouble.
 536
 537 =item -
 538
 539 Automatic correction of some of the things warned about
 540 (e.g., SGML "closed start tag" notation).
 541
 542 =back
 543
 544 =head2 c-format strings emulation
 545
 546 Because English word order is not universal, a simple extraction
 547 of translatable strings may yield some strings like "Accounts for"
 548 or ambiguous strings like "in". This makes the resulting strings
 549 difficult to translate, but does not affect all languages alike.
 550 For example, Chinese (with a somewhat different word order) would
 551 be hit harder, but French would be relatively unaffected.
 552
 553 To overcome this problem, the scanner can be configured to detect
 554 patterns with <TMPL_VAR> directives (as well as certain HTML tags),
 555 and try to construct a larger pattern that will appear in the PO
 556 file as c-format strings with %s placeholders. This additional
 557 step allows the translator to deal with cases where word order
 558 is different (replacing %s with %1$s, %2$s, etc.), or when certain
 559 words will require certain inflectional suffixes in sentences.
 560
 561 Because this is an incompatible change, this mode must be explicitly
 562 turned on using the set_allow_cformat(1) method call.
 563
 564 =head2 The flag characters
 565
 566 The character % is followed by zero or more of the following flags:
 567
 568 =over
 569
 570 =item #
 571
 572 The value comes from HTML <INPUT> elements.
 573 This abuse of the flag character is somewhat reasonable,
 574 since TMPL_VAR and INPUT are both variables, but of different kinds.
 575
 576 =back
 577
 578 =head2 The field width and precision
 579
 580 An optional 0.0 can be specified for %s to specify
 581 that the <TMPL_VAR> should be suppressed.
 582
 583 =head2 The conversion specifier
 584
 585 =over
 586
 587 =item p
 588
 589 Specifies any input field that is neither text nor hidden
 590 (which currently mean radio buttons).
 591 The p conversion specifier is chosen because this does not
 592 evoke any certain sensible data type.
 593
 594 =item S
 595
 596 Specifies a text input field (<INPUT TYPE=TEXT>).
 597 This use of the S conversion specifier is somewhat reasonable,
 598 since text input fields contain values of undeterminable type,
 599 which can be treated as strings.
 600
 601 =item s
 602
 603 Specifies a <TMPL_VAR>.
 604 This use of the o conversion specifier is somewhat reasonable,
 605 since <TMPL_VAR> denotes values of undeterminable type, which
 606 can be treated as strings.
 607
 608 =back
 609
 610 =head1 BUGS
 611
 612 There is no code to save the tag name anywhere in the scanned token.
 613
 614 The use of <AI<i>> to stand for the I<i>th anchor
 615 is not very well thought out.
 616 Some abuse of c-format specifies might have been more appropriate.
 617
 618 =head1 HISTORY
 619
 620 This tokenizer is mostly based
 621 on Ambrose's hideous Perl script known as subst.pl.
 622
 623 =cut
 624
 625 1;