summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgregor herrmann <gregoa@debian.org>2019-11-14 20:01:20 +0100
committergregor herrmann <gregoa@debian.org>2019-11-14 20:01:20 +0100
commit99f40d321f7a8f964755bca97bae0f7e7fb4ba60 (patch)
tree050146ff9b45044cad350c49d47cef27beb98fcc
parenta68f9a18fd0b69b09f10ab150255615159ab4cd3 (diff)
parentd8fef30a362c53b02257b701ba3715b89838825f (diff)
Update upstream source from tag 'upstream/3.0.0'
Update to upstream version '3.0.0' with Debian dir eeb9d119ee0e5e23b0dbf4c6e07ceae6bad0bac9
-rw-r--r--Changes6
-rw-r--r--MANIFEST1
-rw-r--r--META.json42
-rw-r--r--META.yml30
-rw-r--r--Makefile.PL14
-rw-r--r--README.md26
-rw-r--r--cpanfile6
-rw-r--r--dist.ini4
-rw-r--r--lib/HTML/Restrict.pm102
-rw-r--r--lib/HTML/Restrict/Types.pm50
-rw-r--r--t/00-report-prereqs.dd12
-rw-r--r--t/empty-element-tags.t6
-rwxr-xr-xt/malformed-html.t28
-rw-r--r--xt/author/pod-spell.t1
14 files changed, 128 insertions, 200 deletions
diff --git a/Changes b/Changes
index 37d6e96..40d96cd 100644
--- a/Changes
+++ b/Changes
@@ -1,8 +1,8 @@
Revision history for HTML-Restrict
-v2.5.0 2019-02-08 22:18:11Z
- - Strip some control characters from links (GH#34) (Olaf Alders)
- - Enable empty_element_tags in HTML::Parser (GH#35) (Olaf Alders)
+v3.0.0 2019-03-11 13:52:08Z
+ - Better fix for handling malformed tags. Removes max_parser_loops(),
+ which was introduced in 2.4.0 (GH#37) (Graham Knop)
v2.4.1 2019-02-05 14:13:16Z
- Bump version of Type::Tiny to 1.002001. (GH#33) (Olaf Alders). Issue
diff --git a/MANIFEST b/MANIFEST
index 730056a..e2732ec 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -14,7 +14,6 @@ dist.ini
examples/naughty-strings.pl
examples/sanitize-file.pl
lib/HTML/Restrict.pm
-lib/HTML/Restrict/Types.pm
perlcriticrc
perltidyrc
t/00-load.t
diff --git a/META.json b/META.json
index de1c81d..a7b9504 100644
--- a/META.json
+++ b/META.json
@@ -48,7 +48,8 @@
"Test::Pod::Coverage" : "1.08",
"Test::Spelling" : "0.12",
"Test::Synopsis" : "0",
- "Test::Vars" : "0.014"
+ "Test::Vars" : "0.014",
+ "warnings" : "0"
}
},
"runtime" : {
@@ -61,15 +62,13 @@
"Moo" : "1.002000",
"Scalar::Util" : "0",
"Sub::Quote" : "0",
- "Type::Library" : "0",
"Type::Tiny" : "1.002001",
- "Type::Utils" : "0",
+ "Types::Standard" : "1.000001",
"URI" : "0",
"namespace::clean" : "0",
"perl" : "5.006",
"strict" : "0",
- "version" : "0",
- "warnings" : "0"
+ "version" : "0"
}
},
"test" : {
@@ -81,7 +80,8 @@
"File::Spec" : "0",
"Test::Fatal" : "0",
"Test::More" : "0",
- "perl" : "5.006"
+ "perl" : "5.006",
+ "warnings" : "0"
}
}
},
@@ -97,7 +97,7 @@
"web" : "https://github.com/oalders/html-restrict"
}
},
- "version" : "v2.5.0",
+ "version" : "v3.0.0",
"x_Dist_Zilla" : {
"perl" : {
"version" : "5.026001"
@@ -138,7 +138,7 @@
{
"class" : "Dist::Zilla::Plugin::MAXMIND::TidyAll",
"name" : "@Author::OALDERS/MAXMIND::TidyAll",
- "version" : "0.83"
+ "version" : "0.13"
},
{
"class" : "Dist::Zilla::Plugin::AutoPrereqs",
@@ -629,7 +629,7 @@
"branch" : null,
"changelog" : "Changes",
"signed" : 0,
- "tag" : "vv2.5.0",
+ "tag" : "vv3.0.0",
"tag_format" : "v%v",
"tag_message" : "v%v"
},
@@ -723,6 +723,28 @@
"version" : "6.012"
},
{
+ "class" : "Dist::Zilla::Plugin::Git::Tag",
+ "config" : {
+ "Dist::Zilla::Plugin::Git::Tag" : {
+ "branch" : null,
+ "changelog" : "Changes",
+ "signed" : 0,
+ "tag" : "v3.0.0",
+ "tag_format" : "%v",
+ "tag_message" : "%v"
+ },
+ "Dist::Zilla::Role::Git::Repo" : {
+ "git_version" : "2.20.1",
+ "repo_root" : "."
+ },
+ "Dist::Zilla::Role::Git::StringFormatter" : {
+ "time_zone" : "local"
+ }
+ },
+ "name" : "Git::Tag",
+ "version" : "2.045"
+ },
+ {
"class" : "Dist::Zilla::Plugin::FinderCode",
"name" : ":InstallModules",
"version" : "6.012"
@@ -795,6 +817,6 @@
"skaufman <sam@socialflow.com>"
],
"x_generated_by_perl" : "v5.26.1",
- "x_serialization_backend" : "Cpanel::JSON::XS version 4.04"
+ "x_serialization_backend" : "Cpanel::JSON::XS version 4.02"
}
diff --git a/META.yml b/META.yml
index 9830551..059147a 100644
--- a/META.yml
+++ b/META.yml
@@ -8,6 +8,7 @@ build_requires:
Test::Fatal: '0'
Test::More: '0'
perl: '5.006'
+ warnings: '0'
configure_requires:
ExtUtils::MakeMaker: '0'
perl: '5.006'
@@ -32,20 +33,18 @@ requires:
Moo: '1.002000'
Scalar::Util: '0'
Sub::Quote: '0'
- Type::Library: '0'
Type::Tiny: '1.002001'
- Type::Utils: '0'
+ Types::Standard: '1.000001'
URI: '0'
namespace::clean: '0'
perl: '5.006'
strict: '0'
version: '0'
- warnings: '0'
resources:
bugtracker: https://github.com/oalders/html-restrict/issues
homepage: https://github.com/oalders/html-restrict
repository: https://github.com/oalders/html-restrict.git
-version: v2.5.0
+version: v3.0.0
x_Dist_Zilla:
perl:
version: '5.026001'
@@ -78,7 +77,7 @@ x_Dist_Zilla:
-
class: Dist::Zilla::Plugin::MAXMIND::TidyAll
name: '@Author::OALDERS/MAXMIND::TidyAll'
- version: '0.83'
+ version: '0.13'
-
class: Dist::Zilla::Plugin::AutoPrereqs
name: '@Author::OALDERS/AutoPrereqs'
@@ -463,7 +462,7 @@ x_Dist_Zilla:
branch: ~
changelog: Changes
signed: 0
- tag: vv2.5.0
+ tag: vv3.0.0
tag_format: v%v
tag_message: v%v
Dist::Zilla::Role::Git::Repo:
@@ -531,6 +530,23 @@ x_Dist_Zilla:
name: Prereqs
version: '6.012'
-
+ class: Dist::Zilla::Plugin::Git::Tag
+ config:
+ Dist::Zilla::Plugin::Git::Tag:
+ branch: ~
+ changelog: Changes
+ signed: 0
+ tag: v3.0.0
+ tag_format: '%v'
+ tag_message: '%v'
+ Dist::Zilla::Role::Git::Repo:
+ git_version: 2.20.1
+ repo_root: .
+ Dist::Zilla::Role::Git::StringFormatter:
+ time_zone: local
+ name: Git::Tag
+ version: '2.045'
+ -
class: Dist::Zilla::Plugin::FinderCode
name: ':InstallModules'
version: '6.012'
@@ -588,4 +604,4 @@ x_contributors:
- 'perlpong <calyx238@gmail.com>'
- 'skaufman <sam@socialflow.com>'
x_generated_by_perl: v5.26.1
-x_serialization_backend: 'YAML::Tiny version 1.70'
+x_serialization_backend: 'YAML::Tiny version 1.73'
diff --git a/Makefile.PL b/Makefile.PL
index 34e8077..16e61d2 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -25,22 +25,21 @@ my %WriteMakefileArgs = (
"Moo" => "1.002000",
"Scalar::Util" => 0,
"Sub::Quote" => 0,
- "Type::Library" => 0,
"Type::Tiny" => "1.002001",
- "Type::Utils" => 0,
+ "Types::Standard" => "1.000001",
"URI" => 0,
"namespace::clean" => 0,
"strict" => 0,
- "version" => 0,
- "warnings" => 0
+ "version" => 0
},
"TEST_REQUIRES" => {
"ExtUtils::MakeMaker" => 0,
"File::Spec" => 0,
"Test::Fatal" => 0,
- "Test::More" => 0
+ "Test::More" => 0,
+ "warnings" => 0
},
- "VERSION" => "v2.5.0",
+ "VERSION" => "v3.0.0",
"test" => {
"TESTS" => "t/*.t"
}
@@ -60,9 +59,8 @@ my %FallbackPrereqs = (
"Sub::Quote" => 0,
"Test::Fatal" => 0,
"Test::More" => 0,
- "Type::Library" => 0,
"Type::Tiny" => "1.002001",
- "Type::Utils" => 0,
+ "Types::Standard" => "1.000001",
"URI" => 0,
"namespace::clean" => 0,
"strict" => 0,
diff --git a/README.md b/README.md
index 18d67b9..b47aa9c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ HTML::Restrict - Strip unwanted HTML tags and attributes
# VERSION
-version v2.5.0
+version v3.0.0
# SYNOPSIS
@@ -233,30 +233,6 @@ HTML::Restrict recognizes:
$html = $hr->process( $html );
# $html is now: "<!-- comments! -->foo"
-- max\_parser\_loops => \[Integer\]
-
- Defaults to 25. Should never be less than 2.
-
- As of v2.4.0, calling `process()` will force the parser to clean the text
- multiple times, stopping only once the text is no longer changed or once
- `max_parser_loops` has been reached.
-
- The reason for this is that [HTML::Parser](https://metacpan.org/pod/HTML::Parser) could take malformed HTML and turn
- it into well formed HTML. This can defeat our processing logic and allow
- malicious input to be returned. In order to mitigate this, we will clean all
- input at least two times. If the second attempt at cleaning does not match
- the previous attempt, we will make a third attempt and so on. This helps to
- ensure that we get the expected output.
-
- If we are unable to get unchanged values after reaching `max_parser_loops`, an
- exception will be thrown. Returning partially cleaned text would be wrong, as
- would be returning `undef` or an empty string. Throwing an exception forces
- the user to choose the appropriate way of dealing with this.
-
- If you choose to set this value, please note that it can be no less than 2, or
- the parser will never be able to make a comparison with a previous value. An
- exception will be thrown if you attempt to set this to a value less than 2.
-
- replace\_img => \[0|1|CodeRef\]
Set the value to true if you'd like to have img tags replaced with
diff --git a/cpanfile b/cpanfile
index 1cf9d0d..6e5d895 100644
--- a/cpanfile
+++ b/cpanfile
@@ -6,15 +6,13 @@ requires "List::Util" => "1.33";
requires "Moo" => "1.002000";
requires "Scalar::Util" => "0";
requires "Sub::Quote" => "0";
-requires "Type::Library" => "0";
requires "Type::Tiny" => "1.002001";
-requires "Type::Utils" => "0";
+requires "Types::Standard" => "1.000001";
requires "URI" => "0";
requires "namespace::clean" => "0";
requires "perl" => "5.006";
requires "strict" => "0";
requires "version" => "0";
-requires "warnings" => "0";
on 'test' => sub {
requires "ExtUtils::MakeMaker" => "0";
@@ -22,6 +20,7 @@ on 'test' => sub {
requires "Test::Fatal" => "0";
requires "Test::More" => "0";
requires "perl" => "5.006";
+ requires "warnings" => "0";
};
on 'test' => sub {
@@ -51,6 +50,7 @@ on 'develop' => sub {
requires "Test::Spelling" => "0.12";
requires "Test::Synopsis" => "0";
requires "Test::Vars" => "0.014";
+ requires "warnings" => "0";
};
on 'develop' => sub {
diff --git a/dist.ini b/dist.ini
index c8bcf1b..30e9483 100644
--- a/dist.ini
+++ b/dist.ini
@@ -9,3 +9,7 @@ main_module = lib/HTML/Restrict.pm
[Prereqs]
Type::Tiny = 1.002001
+
+[Git::Tag]
+tag_format = %v
+tag_message = %v
diff --git a/lib/HTML/Restrict.pm b/lib/HTML/Restrict.pm
index 6c2424d..fce0fba 100644
--- a/lib/HTML/Restrict.pm
+++ b/lib/HTML/Restrict.pm
@@ -4,20 +4,13 @@ use 5.006;
package HTML::Restrict;
use version;
-our $VERSION = 'v2.5.0';
+our $VERSION = 'v3.0.0';
use Carp qw( croak );
use Data::Dump qw( dump );
-use HTML::Entities qw( encode_entities );
use HTML::Parser ();
-use HTML::Restrict::Types qw(
- ArrayRef
- Bool
- CodeRef
- HashRef
- Int
- MaxParserLoops
-);
+use HTML::Entities qw( encode_entities );
+use Types::Standard 1.000001 qw[ Bool HashRef ArrayRef CodeRef ];
use List::Util 1.33 qw( any none );
use Scalar::Util qw( reftype weaken );
use Sub::Quote 'quote_sub';
@@ -44,12 +37,6 @@ has debug => (
default => 0,
);
-has max_parser_loops => (
- is => 'rw',
- isa => MaxParserLoops,
- default => 25,
-);
-
has parser => (
is => 'ro',
lazy => 1,
@@ -280,6 +267,7 @@ sub _build_parser {
sub {
my ( $p, $text ) = @_;
print "text: $text\n" if $self->debug;
+ $text = _fix_text_encoding($text);
if ( !@{ $self->_stripper_stack } ) {
$self->_processed( ( $self->_processed || q{} ) . $text );
}
@@ -315,29 +303,6 @@ sub _build_parser {
sub process {
my $self = shift;
- my $cleaned = $self->_process(@_);
- return $cleaned if !$cleaned;
-
- my $previous_iteration = $cleaned;
-
- my $i = 1; # We already cleaned once just above
-
- while ( $i < $self->max_parser_loops ) {
- $i++;
- my $new = $self->_process($previous_iteration);
- last if $new eq $previous_iteration;
- if ( $i == $self->max_parser_loops ) {
- die sprintf( 'Could not clean input after %s attempts', $i );
- }
-
- $previous_iteration = $new;
- }
- return $previous_iteration;
-}
-
-sub _process {
- my $self = shift;
-
# returns undef if no value was passed
return if !@_;
return $_[0] if !$_[0];
@@ -388,6 +353,39 @@ sub _delete_tag_from_stack {
return;
}
+# regex for entities that don't require a terminating semicolon
+my ($short_entity_re)
+ = map qr/$_/i,
+ join '|',
+ '#x[0-9a-f]+',
+ '#[0-9]+',
+ grep !/;\z/,
+ sort keys %HTML::Entities::entity2char;
+
+# semicolon required
+my ($complete_entity_re)
+ = map qr/$_/i,
+ join '|',
+ grep /;\z/,
+ sort keys %HTML::Entities::entity2char;
+
+sub _fix_text_encoding {
+ my $text = shift;
+ $text =~ s{
+ &
+ (?:
+ ($short_entity_re);?
+ |
+ ($complete_entity_re)
+ )?
+ }{
+ defined $1 ? "&$1;"
+ : defined $2 ? "&$2"
+ : "&amp;"
+ }xgie;
+ return encode_entities( $text, '<>' );
+}
+
1; # End of HTML::Restrict
# ABSTRACT: Strip unwanted HTML tags and attributes
@@ -404,7 +402,7 @@ HTML::Restrict - Strip unwanted HTML tags and attributes
=head1 VERSION
-version v2.5.0
+version v3.0.0
=head1 SYNOPSIS
@@ -635,30 +633,6 @@ feature is off by default.
$html = $hr->process( $html );
# $html is now: "<!-- comments! -->foo"
-=item * max_parser_loops => [Integer]
-
-Defaults to 25. Should never be less than 2.
-
-As of v2.4.0, calling C<process()> will force the parser to clean the text
-multiple times, stopping only once the text is no longer changed or once
-C<max_parser_loops> has been reached.
-
-The reason for this is that L<HTML::Parser> could take malformed HTML and turn
-it into well formed HTML. This can defeat our processing logic and allow
-malicious input to be returned. In order to mitigate this, we will clean all
-input at least two times. If the second attempt at cleaning does not match
-the previous attempt, we will make a third attempt and so on. This helps to
-ensure that we get the expected output.
-
-If we are unable to get unchanged values after reaching C<max_parser_loops>, an
-exception will be thrown. Returning partially cleaned text would be wrong, as
-would be returning C<undef> or an empty string. Throwing an exception forces
-the user to choose the appropriate way of dealing with this.
-
-If you choose to set this value, please note that it can be no less than 2, or
-the parser will never be able to make a comparison with a previous value. An
-exception will be thrown if you attempt to set this to a value less than 2.
-
=item * replace_img => [0|1|CodeRef]
Set the value to true if you'd like to have img tags replaced with
diff --git a/lib/HTML/Restrict/Types.pm b/lib/HTML/Restrict/Types.pm
deleted file mode 100644
index ba80a1e..0000000
--- a/lib/HTML/Restrict/Types.pm
+++ /dev/null
@@ -1,50 +0,0 @@
-package HTML::Restrict::Types;
-our $VERSION = 'v2.5.0';
-use strict;
-use warnings;
-
-use Type::Library -base;
-use Type::Utils ();
-
-BEGIN {
- Type::Utils::extends( 'Types::Common::Numeric', 'Types::Standard', );
-}
-
-__PACKAGE__->add_type(
- {
- name => 'MaxParserLoops',
- parent => PositiveInt,
- constraint => '$_ >= 2',
- }
-);
-
-1;
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-HTML::Restrict::Types - Type library for HTML::Restrict
-
-=head1 VERSION
-
-version v2.5.0
-
-=head1 AUTHOR
-
-Olaf Alders <olaf@wundercounter.com>
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is copyright (c) 2013-2017 by Olaf Alders.
-
-This is free software; you can redistribute it and/or modify it under
-the same terms as the Perl 5 programming language system itself.
-
-=cut
-
-__END__
-
-# ABSTRACT: Type library for HTML::Restrict
diff --git a/t/00-report-prereqs.dd b/t/00-report-prereqs.dd
index ce615bf..31bf62d 100644
--- a/t/00-report-prereqs.dd
+++ b/t/00-report-prereqs.dd
@@ -26,7 +26,8 @@ do { my $x = {
'Test::Pod::Coverage' => '1.08',
'Test::Spelling' => '0.12',
'Test::Synopsis' => '0',
- 'Test::Vars' => '0.014'
+ 'Test::Vars' => '0.014',
+ 'warnings' => '0'
}
},
'runtime' => {
@@ -39,15 +40,13 @@ do { my $x = {
'Moo' => '1.002000',
'Scalar::Util' => '0',
'Sub::Quote' => '0',
- 'Type::Library' => '0',
'Type::Tiny' => '1.002001',
- 'Type::Utils' => '0',
+ 'Types::Standard' => '1.000001',
'URI' => '0',
'namespace::clean' => '0',
'perl' => '5.006',
'strict' => '0',
- 'version' => '0',
- 'warnings' => '0'
+ 'version' => '0'
}
},
'test' => {
@@ -59,7 +58,8 @@ do { my $x = {
'File::Spec' => '0',
'Test::Fatal' => '0',
'Test::More' => '0',
- 'perl' => '5.006'
+ 'perl' => '5.006',
+ 'warnings' => '0'
}
}
};
diff --git a/t/empty-element-tags.t b/t/empty-element-tags.t
index ce9cd9a..e6dc3f0 100644
--- a/t/empty-element-tags.t
+++ b/t/empty-element-tags.t
@@ -12,9 +12,9 @@ one element open & close break no space<br/>
EOF
my $after = <<'EOF';
-two element open & close break<br></br>
-one element open & close break <br>
-one element open & close break no space<br>
+two element open &amp; close break<br></br>
+one element open &amp; close break <br>
+one element open &amp; close break no space<br>
EOF
my $hr = HTML::Restrict->new(
diff --git a/t/malformed-html.t b/t/malformed-html.t
index afb1288..d838ec1 100755
--- a/t/malformed-html.t
+++ b/t/malformed-html.t
@@ -22,31 +22,21 @@ my $html = '<<input>div onmouseover="alert(1);">hover over me<<input>/div>';
{
my $hr = HTML::Restrict->new;
is(
- $hr->process($html), 'hover over me',
+ $hr->process(
+ '<<input>div onmouseover="alert(1);">hover over me<<input>/div>'),
+ '&lt;div onmouseover="alert(1);"&gt;hover over me&lt;/div&gt;',
'malformed HTML is correctly cleaned'
);
}
{
- my $attempts = 2;
- my $hr = HTML::Restrict->new( max_parser_loops => $attempts );
- like(
- exception { $hr->process($html) },
- qr/after $attempts attempts/,
- 'dies after max loops exceeded',
+ my $hr = HTML::Restrict->new;
+ is(
+ $hr->process(
+ '&<input></input>lt; &theta; &aMp; &#50; &#x50; &#xabg;'),
+ '&amp;lt; &theta; &aMp; &#50; &#x50; &#xab;g;',
+ 'badly encoded entities corrected'
);
- $hr->max_parser_loops(3);
- is( $hr->process('<foo>bar'), 'bar', 'can parse after caught exception' );
-}
-
-{
- for my $i ( -1 .. 1 ) {
- like(
- exception { HTML::Restrict->new( max_parser_loops => $i ) },
- qr/did not pass type constraint/i,
- 'max_parser_loops cannot be ' . $i,
- );
- }
}
done_testing();
diff --git a/xt/author/pod-spell.t b/xt/author/pod-spell.t
index 5a8e4d9..a813d52 100644
--- a/xt/author/pod-spell.t
+++ b/xt/author/pod-spell.t
@@ -38,7 +38,6 @@ Raybec
Restrict
Schmidt
TerMarsch
-Types
XSS
benkasminbullock
bolded