summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
Diffstat (limited to 't')
-rw-r--r--t/00-load.t125
-rw-r--r--t/attribute_constraints.t66
-rw-r--r--t/author-pod-coverage.t15
-rw-r--r--t/author-pod-spell.t74
-rw-r--r--t/author-synopsis.t13
-rw-r--r--t/author-tidyall.t19
-rw-r--r--t/comments.t33
-rw-r--r--t/declaration.t22
-rw-r--r--t/js.t24
-rw-r--r--t/lowercase.t29
-rw-r--r--t/memory-leak.t24
-rw-r--r--t/perlcriticrc4
-rw-r--r--t/pod.t12
-rw-r--r--t/release-cpan-changes.t18
-rw-r--r--t/replace_img.t62
-rw-r--r--t/scheme.t70
-rw-r--r--t/stack.t19
-rw-r--r--t/style.t24
-rwxr-xr-xt/xss.t35
19 files changed, 688 insertions, 0 deletions
diff --git a/t/00-load.t b/t/00-load.t
new file mode 100644
index 0000000..1c1421c
--- /dev/null
+++ b/t/00-load.t
@@ -0,0 +1,125 @@
+#!perl
+
+use Test::More;
+
+use strict;
+use warnings;
+
+use Data::Dump;
+use HTML::Restrict;
+use Scalar::Util;
+
+my $version = $HTML::Restrict::VERSION || 'development';
+diag( "Testing HTML::Restrict $version, Perl $], $^X" );
+
+my $hr = HTML::Restrict->new( debug => 0 );
+isa_ok( $hr, 'HTML::Restrict' );
+
+isa_ok( $hr->parser, 'HTML::Parser' );
+
+my $default_rules = $hr->get_rules;
+
+cmp_ok( Scalar::Util::reftype( $default_rules ),
+ 'eq', 'HASH', "default rules are empty" );
+
+# basic stripping -- all tags strippped
+my $bold = '<b>i am bold</b>';
+my $processed = $hr->process( $bold );
+cmp_ok( $processed, 'eq', 'i am bold', "b tag stripped" );
+
+# updating rules
+my $b_rules = { b => [] };
+$hr->set_rules( $b_rules );
+my $updated_rules = $hr->get_rules;
+is_deeply( $b_rules, $updated_rules, "rules update correctly" );
+
+# ensure allowed tags aren't stripped
+$processed = $hr->process( $bold );
+cmp_ok( $processed, 'eq', $bold, "b tag not stripped" );
+
+# more complex set with multiple tags
+# ensure allowed tags aren't stripped and others are removed
+$hr->set_rules( { a => [qw( href target )] } );
+my $link
+ = q[<center><a href="http://google.com" target="_blank" id="test">google</a></center>];
+my $processed_link = $hr->process( $link );
+cmp_ok(
+ $processed_link, 'eq',
+ q[<a href="http://google.com" target="_blank">google</a>],
+ "allowed link but not center tag",
+);
+
+# ensure closing slash is maintained for tags
+# with no end tag
+$hr->set_rules( { img => [qw( src width height /)] } );
+my $img = q[<body><img src="/face.jpg" width="10" height="10" /></body>];
+my $processed_img = $hr->process( $img );
+cmp_ok(
+ $processed_img, 'eq',
+ '<img src="/face.jpg" width="10" height="10" />',
+ "closing slash preserved in image"
+);
+
+# rest rules to default set
+$hr->set_rules( {} );
+cmp_ok( $hr->process( $bold ), 'eq', 'i am bold', "back to default rules" );
+
+# stripping of comments
+cmp_ok( $hr->process( "<!-- comment this -->ok" ),
+ 'eq', 'ok', "comments are stripped" );
+
+# stripping of javascript includes
+cmp_ok(
+ $hr->process(
+ q{<script type="text/javascript" src="/js/jquery-1.3.2.js"></script>ok}
+ ),
+ 'eq', 'ok',
+ "javascript includes are stripped"
+);
+
+# stripping of css includes
+cmp_ok(
+ $hr->process(
+ q{<link href="/style.css" media="screen" rel="stylesheet" type="text/css" />ok}
+ ),
+ 'eq', 'ok',
+ "css includes are stripped"
+);
+
+ok( $hr->trim, "trim enabled by default" );
+
+# stripping of leading and trailing spaces
+cmp_ok( $hr->process( " ok ok ok " ),
+ 'eq', 'ok ok ok', "leading and trailing spaces trimmed" );
+
+# stripping of div tags
+cmp_ok( $hr->process( "<div>ok</div>" ),
+ 'eq', 'ok', "divs are stripped away" );
+
+# undef should be returned when no value is passed to the process method
+is( $hr->process(), undef, "undef is returned when no value passed" );
+
+
+# start fresh
+# RT #55775
+$hr = HTML::Restrict->new;
+
+cmp_ok( $hr->process( 0 ), 'eq', '0', "untrue values not processed");
+cmp_ok( $hr->process( '0' ), 'eq', '0', "untrue values not processed");
+cmp_ok( $hr->process( '000' ), 'eq', '000', "untrue values not processed");
+
+ok( !$hr->process("<html>"), "process only HTML" );
+
+
+# bugfix: check URI scheme for "src" attributes
+$hr = HTML::Restrict->new( rules => { img => [qw( src )] } );
+$hr->set_uri_schemes( [ undef, 'http', 'https' ] );
+cmp_ok( $hr->process('<img src="file:/some/file">'), 'eq', '<img>' );
+
+# bugfix: ensure stripper stack is reset in case of broken html
+$hr = HTML::Restrict->new;
+$hr->strip_enclosed_content( ['script'] );
+$hr->process('<script < b >');
+cmp_ok($hr->process('some text'), 'eq', 'some text', "stripper stack reset");
+
+done_testing();
diff --git a/t/attribute_constraints.t b/t/attribute_constraints.t
new file mode 100644
index 0000000..1f58bc8
--- /dev/null
+++ b/t/attribute_constraints.t
@@ -0,0 +1,66 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new(
+ rules => {
+ iframe => [
+ qw( width height ),
+ {
+ src => qr{^http://www\.youtube\.com},
+ frameborder => qr{^(0|1)$},
+ }
+ ],
+ },
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
+ ),
+ 'eq',
+ '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>',
+ 'all constraints pass',
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="0"></iframe>'
+ ),
+ 'eq',
+ '<iframe width="560" height="315" frameborder="0"></iframe>',
+ 'one constraint fails',
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="A"></iframe>'
+ ),
+ 'eq',
+ '<iframe width="560" height="315"></iframe>',
+ 'two constraints fail',
+);
+
+$hr = HTML::Restrict->new(
+ rules => {
+ iframe => [
+ { src => qr{^http://www\.youtube\.com} },
+ { frameborder => qr{^(0|1)$} },
+ { height => qr{^315$} },
+ { width => qr{^560$} },
+ ],
+ },
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
+ ),
+ 'eq',
+ '<iframe src="http://www.youtube.com/embed/9gKeRZM2Iyc" frameborder="0" height="315" width="560"></iframe>',
+ 'possible to maintain order',
+);
+
+done_testing;
diff --git a/t/author-pod-coverage.t b/t/author-pod-coverage.t
new file mode 100644
index 0000000..243340f
--- /dev/null
+++ b/t/author-pod-coverage.t
@@ -0,0 +1,15 @@
+#!perl
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests.
+
+use Test::Pod::Coverage 1.08;
+use Pod::Coverage::TrustPod;
+
+all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' });
diff --git a/t/author-pod-spell.t b/t/author-pod-spell.t
new file mode 100644
index 0000000..8b32a99
--- /dev/null
+++ b/t/author-pod-spell.t
@@ -0,0 +1,74 @@
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+use strict;
+use warnings;
+use Test::More;
+
+# generated by Dist::Zilla::Plugin::Test::PodSpelling 2.007004
+use Test::Spelling 0.12;
+use Pod::Wordlist;
+
+
+add_stopwords(<DATA>);
+all_pod_files_spelling_ok( qw( bin lib ) );
+__DATA__
+Alders
+Alders'
+Arthur
+Axel
+Ben
+Bullock
+Carwyn
+DOCTYPE
+Dagfinn
+David
+Elliott
+Etheridge
+Fitz
+Forsyth
+Golden
+Graham
+HTML
+Ilmari
+Jubenville
+Karen
+Knop
+Mannsåker
+Mark
+Olaf
+Raybec
+Restrict
+Schmidt
+TerMarsch
+XSS
+benkasminbullock
+bolded
+calyx238
+dagolden
+ether
+fREW
+fitz
+frioux
+graham
+haarg
+href
+img
+ioncache
+lib
+mark
+olaf
+param
+params
+perlpong
+sam
+schemas
+skaufman
+src
+whitelisted
+whitelisting
diff --git a/t/author-synopsis.t b/t/author-synopsis.t
new file mode 100644
index 0000000..5d1d4a7
--- /dev/null
+++ b/t/author-synopsis.t
@@ -0,0 +1,13 @@
+#!perl
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+
+use Test::Synopsis;
+
+all_synopsis_ok();
diff --git a/t/author-tidyall.t b/t/author-tidyall.t
new file mode 100644
index 0000000..f161a3e
--- /dev/null
+++ b/t/author-tidyall.t
@@ -0,0 +1,19 @@
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::Test::TidyAll v$VERSION
+
+use Test::More 0.88;
+use Test::Code::TidyAll 0.24;
+
+tidyall_ok(
+ verbose => ( exists $ENV{TEST_TIDYALL_VERBOSE} ? $ENV{TEST_TIDYALL_VERBOSE} : 0 ),
+ jobs => ( exists $ENV{TEST_TIDYALL_JOBS} ? $ENV{TEST_TIDYALL_JOBS} : 1 ),
+);
+
+done_testing;
diff --git a/t/comments.t b/t/comments.t
new file mode 100644
index 0000000..2ceccbb
--- /dev/null
+++ b/t/comments.t
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use HTML::Restrict;
+use Test::More;
+
+my $hr = HTML::Restrict->new;
+
+my $text = '<!-- comment here -->stuff';
+$hr->debug(0);
+
+is $hr->process($text), 'stuff', 'comments allowed';
+$hr->allow_comments(1);
+is $hr->process($text), $text, 'comments allowd';
+
+$text = 'before<!-- This is a comment -- -- So is this -->after';
+$hr->allow_comments(0);
+
+is $hr->process($text), 'beforeafter', 'comment allowed';
+
+$hr->allow_comments(1);
+is $hr->process($text), $text, 'comments allowd';
+
+$hr->allow_comments(0);
+$text = '<!-- <script> <h1> -->';
+is $hr->process($text), undef, 'tags nested in comments removed';
+
+#$hr->set_rules({ script => [], 'h1' => [] });
+#is $hr->process( $text ), $text, 'tags nested in comments not removed when explicitly allowed';
+
+done_testing();
diff --git a/t/declaration.t b/t/declaration.t
new file mode 100644
index 0000000..2d5e9bf
--- /dev/null
+++ b/t/declaration.t
@@ -0,0 +1,22 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use HTML::Restrict;
+use Test::More;
+
+my $hr = HTML::Restrict->new;
+
+my $text = '<!DOCTYPE HTML> ';
+$hr->debug(1);
+
+is $hr->process($text), '', 'declaration not preserved';
+$hr->allow_declaration(1);
+is $hr->process($text), '<!DOCTYPE HTML>', 'declaration is preserved';
+
+$text
+ = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
+is $hr->process($text), $text, 'declaration preserved';
+
+done_testing();
diff --git a/t/js.t b/t/js.t
new file mode 100644
index 0000000..6d25754
--- /dev/null
+++ b/t/js.t
@@ -0,0 +1,24 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my $hr = HTML::Restrict->new( debug => 0 );
+
+my $html = q[<script type="text/javascript">
+$(document).ready(function() {
+ $('a.gallery').fancybox();
+});
+</script>];
+
+is( $hr->process($html), undef, "content of script tags removed by default" );
+
+$hr->set_rules( { script => ['type'] } );
+
+is( $hr->process($html), $html, "content of script preserved" );
+
+done_testing();
diff --git a/t/lowercase.t b/t/lowercase.t
new file mode 100644
index 0000000..7284884
--- /dev/null
+++ b/t/lowercase.t
@@ -0,0 +1,29 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Fatal;
+use HTML::Restrict;
+
+my $html
+ = q[<!doctype html><!-- comments go here --><body onLoad="stuff">foo</body>];
+
+like(
+ exception {
+ my $hr = HTML::Restrict->new( rules => { Body => ['onload'] } );
+ },
+ qr{tag names must be lower cased},
+ "dies on mixed case tag names",
+);
+
+like(
+ exception {
+ my $hr = HTML::Restrict->new( rules => { body => ['onLoad'] } );
+ },
+ qr{attribute names must be lower cased},
+ "dies on mixed case attributes",
+);
+
+done_testing();
diff --git a/t/memory-leak.t b/t/memory-leak.t
new file mode 100644
index 0000000..7111746
--- /dev/null
+++ b/t/memory-leak.t
@@ -0,0 +1,24 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+use Scalar::Util qw(weaken);
+
+# Ensure that we don't have any circular references between the HTML::Restrict
+# object and its parser.
+my $hr = HTML::Restrict->new;
+my $p = $hr->parser;
+
+my $weak_hr = $hr;
+my $weak_p = $p;
+weaken($weak_hr);
+weaken($weak_p);
+
+undef $hr;
+undef $p;
+
+ok !defined $weak_hr, 'HTML::Restrict freed; no circular reference.';
+ok !defined $weak_p, 'HTML::Parser freed; no circular reference.';
+
+done_testing();
diff --git a/t/perlcriticrc b/t/perlcriticrc
new file mode 100644
index 0000000..d6574dd
--- /dev/null
+++ b/t/perlcriticrc
@@ -0,0 +1,4 @@
+severity = 3
+[-Subroutines::RequireArgUnpacking]
+[-ControlStructures::ProhibitPostfixControls]
+[-Miscellanea::RequireRcsKeywords]
diff --git a/t/pod.t b/t/pod.t
new file mode 100644
index 0000000..ee8b18a
--- /dev/null
+++ b/t/pod.t
@@ -0,0 +1,12 @@
+#!perl -T
+
+use strict;
+use warnings;
+use Test::More;
+
+# Ensure a recent version of Test::Pod
+my $min_tp = 1.22;
+eval "use Test::Pod $min_tp";
+plan skip_all => "Test::Pod $min_tp required for testing POD" if $@;
+
+all_pod_files_ok();
diff --git a/t/release-cpan-changes.t b/t/release-cpan-changes.t
new file mode 100644
index 0000000..08331d3
--- /dev/null
+++ b/t/release-cpan-changes.t
@@ -0,0 +1,18 @@
+
+BEGIN {
+ unless ($ENV{RELEASE_TESTING}) {
+ print qq{1..0 # SKIP these tests are for release candidate testing\n};
+ exit
+ }
+}
+
+use strict;
+use warnings;
+
+# this test was generated with Dist::Zilla::Plugin::Test::CPAN::Changes 0.012
+
+use Test::More 0.96 tests => 1;
+use Test::CPAN::Changes;
+subtest 'changes_ok' => sub {
+ changes_file_ok('Changes');
+};
diff --git a/t/replace_img.t b/t/replace_img.t
new file mode 100644
index 0000000..f40aa90
--- /dev/null
+++ b/t/replace_img.t
@@ -0,0 +1,62 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my @texts = (
+ {
+ label => "<img ... />",
+ html => q{<img alt="foo bar" src="http://example.com/foo.jpg" />},
+ },
+ {
+ label => "<img ... ></img>",
+ html => q{<img alt="foo bar" src="http://example.com/foo.jpg"></img>},
+ },
+);
+
+my @cases = (
+ {
+ label => "default args",
+ args => {},
+ expect => undef,
+ },
+ {
+ label => "replace_img => 0",
+ args => { replace_img => 0 },
+ expect => undef,
+ },
+ {
+ label => "replace_img => 1",
+ args => { replace_img => 1 },
+ expect => q{[IMAGE: foo bar]},
+ },
+ {
+ label => "replace_img => CODE",
+ args => { replace_img => \&replacer },
+ expect => q{[IMAGE REMOVED: foo bar]},
+ },
+);
+
+sub replacer {
+ my ( $tag, $attr, $text ) = @_;
+ return "[IMAGE REMOVED: $attr->{alt}]";
+}
+
+for my $c (@cases) {
+ ok(
+ my $hr = HTML::Restrict->new( debug => 0, %{ $c->{args} } ),
+ "$c->{label}: HTML::Restrict->new(...)"
+ );
+ for my $t (@texts) {
+ is(
+ $hr->process( $t->{html} ), $c->{expect},
+ "$c->{label}: $t->{label}"
+ );
+ }
+}
+
+done_testing();
diff --git a/t/scheme.t b/t/scheme.t
new file mode 100644
index 0000000..8bebd90
--- /dev/null
+++ b/t/scheme.t
@@ -0,0 +1,70 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new(
+ rules => {
+ a => [qw( href )],
+ img => [qw( src /)],
+ blockquote => [qw( cite )],
+ },
+);
+
+$hr->set_uri_schemes( [ 'http', 'https', undef, 'ftp' ] );
+
+cmp_ok(
+ $hr->process('<a href="http://example.com">link</a>'),
+ 'eq', '<a href="http://example.com">link</a>',
+ 'http scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="https://example.com">link</a>'),
+ 'eq', '<a href="https://example.com">link</a>',
+ 'https scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="/some/file">link</a>'),
+ 'eq', '<a href="/some/file">link</a>',
+ 'relative scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="ftp://example.com">link</a>'),
+ 'eq', '<a href="ftp://example.com">link</a>',
+ 'ftp scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="file://example.com">link</a>'),
+ 'eq', '<a>link</a>',
+ 'file scheme removed',
+);
+
+cmp_ok(
+ $hr->process('<img src="javascript:evil_fc()" />'),
+ 'eq', '<img />',
+ 'img src with javascript removed',
+);
+
+cmp_ok(
+ $hr->process(
+ '<blockquote cite="javascript:evil_fc()">blockquote</blockquote>'),
+ 'eq',
+ '<blockquote>blockquote</blockquote>',
+ 'blockquote cite with javascript removed',
+);
+
+# disable relative schemes
+$hr->set_uri_schemes( [ 'http', 'https', 'ftp' ] );
+
+cmp_ok(
+ $hr->process('<a href="/some/file">link</a>'),
+ 'eq', '<a>link</a>',
+ 'relative scheme removed',
+);
+
+done_testing();
diff --git a/t/stack.t b/t/stack.t
new file mode 100644
index 0000000..b95f0ff
--- /dev/null
+++ b/t/stack.t
@@ -0,0 +1,19 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new;
+
+ok( !@{ $hr->_stripper_stack }, "stack empty" );
+
+push @{ $hr->_stripper_stack }, 'script', 'style', 'pre', 'script';
+$hr->_delete_tag_from_stack('script');
+is_deeply(
+ $hr->_stripper_stack,
+ [ 'script', 'style', 'pre' ],
+ 'deletes from stack in correct order and amount'
+);
+
+done_testing();
diff --git a/t/style.t b/t/style.t
new file mode 100644
index 0000000..2491fa5
--- /dev/null
+++ b/t/style.t
@@ -0,0 +1,24 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my $hr = HTML::Restrict->new( debug => 0 );
+
+my $html = q[<style type="text/css">
+hr {color:sienna;}
+p {margin-left:20px;}
+body {background-image:url("images/back40.gif");}
+</style>];
+
+is( $hr->process($html), undef, "content of style tag removed by default" );
+
+$hr->set_rules( { style => ['type'] } );
+
+is( $hr->process($html), $html, "content of style tag preserved" );
+
+done_testing();
diff --git a/t/xss.t b/t/xss.t
new file mode 100755
index 0000000..0f98789
--- /dev/null
+++ b/t/xss.t
@@ -0,0 +1,35 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new;
+$hr->debug(0);
+$hr->set_rules( { a => [ 'href', 'class' ] } );
+
+my $text = '<a href="javascript:alert(1)">oops!</a>';
+
+my $clean = $hr->process($text);
+is $clean, '<a>oops!</a>', "bad scheme removed";
+
+is $hr->process('<a href="javascript&#58;evil_script()">evil</a>'),
+ '<a>evil</a>', 'bad scheme removed';
+
+foreach my $uri (
+ 'http://vilerichard.com', 'https://vilerichard.com',
+ '//vilerichard.com', '/music'
+ ) {
+ my $img = qq[<a href="$uri">click</a>];
+ is $hr->process($img), $img, "good uri scheme preserved";
+}
+
+is $hr->process(
+ '<a class="&quot;&gt;&lt;script&gt;alert(&quot;oops&quot;);&lt;/script&gt;&lt;a href=&quot;"></a>'
+ ),
+ '<a class="&quot;&gt;&lt;script&gt;alert(&quot;oops&quot;);&lt;/script&gt;&lt;a href=&quot;"></a>',
+ 'attribute value filtered';
+
+done_testing();