From f8fecb78634a2f075ef7db9bb367fc4c52f48bfe Mon Sep 17 00:00:00 2001 From: Joe Atzberger Date: Thu, 8 May 2008 01:06:44 -0500 Subject: [PATCH] Wrapper for Koha's use of HTML::Scrubber, with test script on usage. Signed-off-by: Joshua Ferraro --- C4/Scrubber.pm | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++ t/Scrubber.t | 68 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 C4/Scrubber.pm create mode 100755 t/Scrubber.t diff --git a/C4/Scrubber.pm b/C4/Scrubber.pm new file mode 100644 index 0000000000..e8e4023710 --- /dev/null +++ b/C4/Scrubber.pm @@ -0,0 +1,83 @@ +package C4::Scrubber; +# This file is part of Koha. +# +# Koha is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# Koha is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, +# Suite 330, Boston, MA 02111-1307 USA + +use strict; +use warnings; +use Carp; +use HTML::Scrubber; + +use C4::Context; +use C4::Debug; + +use vars qw($VERSION @ISA); +use vars qw(%scrubbertypes $scrubbertype); + +BEGIN { + $VERSION = 0.01; + # @ISA = qw(HTML::Scrubber); +} + +INIT { + %scrubbertypes = ( + default => {}, # place holder, default settings are below as fallbacks in call to constructor + tag => {}, # uses defaults + comment => { + allow => [qw( br b i em big small )], + }, + staff => { + default => [ 1 =>{'*'=>1} ], + comment => 1, + }, + ); +} + + +sub new { + my $fakeself = shift; # not really OO, we return an HTML::Scrubber object. + my $type = (@_) ? shift : 'default'; + exists $scrubbertypes{$type} or croak "New called with unrecognized type '$type'"; + $debug and print STDERR "Building new Scrubber of type '$type'\n"; + my $settings = $scrubbertypes{$type}; + my $scrubber = HTML::Scrubber->new( + allow => exists $settings->{allow} ? $settings->{allow} : [], + rules => exists $settings->{rules} ? $settings->{rules} : [], + default => exists $settings->{default} ? $settings->{default} : [ 0 =>{'*'=>0} ], + comment => exists $settings->{comment} ? $settings->{comment} : 0, + process => 0, + ); + return $scrubber; +} + + +1; +__END__ + +=head1 C4::Sanitize + +Standardized wrapper with settings for building HTML::Scrubber tailored to various koha inputs. +More verbose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}. + +The default is to scrub everything, leaving no markup at all. This is compatible with the expectations +for Tags. + +=head2 + +=head3 TO DO: Add real perldoc + +=head2 + +=cut + diff --git a/t/Scrubber.t b/t/Scrubber.t new file mode 100755 index 0000000000..218ecdfe56 --- /dev/null +++ b/t/Scrubber.t @@ -0,0 +1,68 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Test::More tests => 10; +BEGIN { + use FindBin; + use lib $FindBin::Bin; + use override_context_prefs; + use_ok('C4::Scrubber'); +} + +sub pretty_line { + my $max = 54; + (@_) or return "#" x $max . "\n"; + my $phrase = " " . shift() . " "; + my $half = "#" x (($max - length($phrase))/2); + return $half . $phrase . $half . "\n"; +} + +my ($scrubber,$html,$result,@types,$collapse); +$collapse = 1; +@types = qw(comment tag); +$html = q| + + +
+ + + I am ITALICS with fake="attribute"
+ I am em with fake="attribute"
+ I am BOLD
+ I am a span w/ style. Bad style. + I am a span trying to inject a link: <a href="badlink.html"> link <a> +
+ + I am a link firing javascript. +
+ + ONMOUSEOVER JAVASCRIPT + +
+At the end here, I actually have some regular text. +|; + +print pretty_line("Original HTML:"), $html, "\n", pretty_line(); +$collapse and diag "Note: scrubber test output will have whitespace collapsed for readability\n"; +ok($scrubber = C4::Scrubber->new(), "Constructor: C4::Scrubber->new()"); +ok(printf("# scrubber settings: default %s, comment %s, process %s\n", + $scrubber->default(),$scrubber->comment(),$scrubber->process()), + "Outputting settings from scrubber object (type: [default])" +); +ok($result = $scrubber->scrub($html), "Getting scrubbed text (type: [default])"); +$collapse and $result =~ s/\s*\n\s*/\n/g; +print pretty_line('default'), $result, "\n", pretty_line(); + +foreach(@types) { + ok($scrubber = C4::Scrubber->new($_), "Constructor: C4::Scrubber->new($_)"); + ok(printf("# scrubber settings: default %s, comment %s, process %s\n", + $scrubber->default(),$scrubber->comment(),$scrubber->process()), + "Outputting settings from scrubber object (type: $_)" + ); + ok($result = $scrubber->scrub($html), "Getting scrubbed text (type: $_)"); + $collapse and $result =~ s/\s*\n\s*/\n/g; + print pretty_line($_), $result, "\n", pretty_line(); +} +diag "done.\n"; -- 2.39.5