diff options
author | gregor herrmann <gregoa@debian.org> | 2019-11-14 20:01:20 +0100 |
---|---|---|
committer | gregor herrmann <gregoa@debian.org> | 2019-11-14 20:01:20 +0100 |
commit | 99f40d321f7a8f964755bca97bae0f7e7fb4ba60 (patch) | |
tree | 050146ff9b45044cad350c49d47cef27beb98fcc | |
parent | a68f9a18fd0b69b09f10ab150255615159ab4cd3 (diff) | |
parent | d8fef30a362c53b02257b701ba3715b89838825f (diff) |
Update upstream source from tag 'upstream/3.0.0'
Update to upstream version '3.0.0'
with Debian dir eeb9d119ee0e5e23b0dbf4c6e07ceae6bad0bac9
-rw-r--r-- | Changes | 6 | ||||
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | META.json | 42 | ||||
-rw-r--r-- | META.yml | 30 | ||||
-rw-r--r-- | Makefile.PL | 14 | ||||
-rw-r--r-- | README.md | 26 | ||||
-rw-r--r-- | cpanfile | 6 | ||||
-rw-r--r-- | dist.ini | 4 | ||||
-rw-r--r-- | lib/HTML/Restrict.pm | 102 | ||||
-rw-r--r-- | lib/HTML/Restrict/Types.pm | 50 | ||||
-rw-r--r-- | t/00-report-prereqs.dd | 12 | ||||
-rw-r--r-- | t/empty-element-tags.t | 6 | ||||
-rwxr-xr-x | t/malformed-html.t | 28 | ||||
-rw-r--r-- | xt/author/pod-spell.t | 1 |
14 files changed, 128 insertions, 200 deletions
@@ -1,8 +1,8 @@ Revision history for HTML-Restrict -v2.5.0 2019-02-08 22:18:11Z - - Strip some control characters from links (GH#34) (Olaf Alders) - - Enable empty_element_tags in HTML::Parser (GH#35) (Olaf Alders) +v3.0.0 2019-03-11 13:52:08Z + - Better fix for handling malformed tags. Removes max_parser_loops(), + which was introduced in 2.4.0 (GH#37) (Graham Knop) v2.4.1 2019-02-05 14:13:16Z - Bump version of Type::Tiny to 1.002001. (GH#33) (Olaf Alders). Issue @@ -14,7 +14,6 @@ dist.ini examples/naughty-strings.pl examples/sanitize-file.pl lib/HTML/Restrict.pm -lib/HTML/Restrict/Types.pm perlcriticrc perltidyrc t/00-load.t @@ -48,7 +48,8 @@ "Test::Pod::Coverage" : "1.08", "Test::Spelling" : "0.12", "Test::Synopsis" : "0", - "Test::Vars" : "0.014" + "Test::Vars" : "0.014", + "warnings" : "0" } }, "runtime" : { @@ -61,15 +62,13 @@ "Moo" : "1.002000", "Scalar::Util" : "0", "Sub::Quote" : "0", - "Type::Library" : "0", "Type::Tiny" : "1.002001", - "Type::Utils" : "0", + "Types::Standard" : "1.000001", "URI" : "0", "namespace::clean" : "0", "perl" : "5.006", "strict" : "0", - "version" : "0", - "warnings" : "0" + "version" : "0" } }, "test" : { @@ -81,7 +80,8 @@ "File::Spec" : "0", "Test::Fatal" : "0", "Test::More" : "0", - "perl" : "5.006" + "perl" : "5.006", + "warnings" : "0" } } }, @@ -97,7 +97,7 @@ "web" : "https://github.com/oalders/html-restrict" } }, - "version" : "v2.5.0", + "version" : "v3.0.0", "x_Dist_Zilla" : { "perl" : { "version" : "5.026001" @@ -138,7 +138,7 @@ { "class" : "Dist::Zilla::Plugin::MAXMIND::TidyAll", "name" : "@Author::OALDERS/MAXMIND::TidyAll", - "version" : "0.83" + "version" : "0.13" }, { "class" : "Dist::Zilla::Plugin::AutoPrereqs", @@ -629,7 +629,7 @@ "branch" : null, "changelog" : "Changes", "signed" : 0, - "tag" : "vv2.5.0", + "tag" : "vv3.0.0", "tag_format" : "v%v", "tag_message" : "v%v" }, @@ -723,6 +723,28 @@ "version" : "6.012" }, { + "class" : "Dist::Zilla::Plugin::Git::Tag", + "config" : { + "Dist::Zilla::Plugin::Git::Tag" : { + "branch" : null, + "changelog" : "Changes", + "signed" : 0, + "tag" : "v3.0.0", + "tag_format" : "%v", + "tag_message" : "%v" + }, + "Dist::Zilla::Role::Git::Repo" : { + "git_version" : "2.20.1", + "repo_root" : "." + }, + "Dist::Zilla::Role::Git::StringFormatter" : { + "time_zone" : "local" + } + }, + "name" : "Git::Tag", + "version" : "2.045" + }, + { "class" : "Dist::Zilla::Plugin::FinderCode", "name" : ":InstallModules", "version" : "6.012" @@ -795,6 +817,6 @@ "skaufman <sam@socialflow.com>" ], "x_generated_by_perl" : "v5.26.1", - "x_serialization_backend" : "Cpanel::JSON::XS version 4.04" + "x_serialization_backend" : "Cpanel::JSON::XS version 4.02" } @@ -8,6 +8,7 @@ build_requires: Test::Fatal: '0' Test::More: '0' perl: '5.006' + warnings: '0' configure_requires: ExtUtils::MakeMaker: '0' perl: '5.006' @@ -32,20 +33,18 @@ requires: Moo: '1.002000' Scalar::Util: '0' Sub::Quote: '0' - Type::Library: '0' Type::Tiny: '1.002001' - Type::Utils: '0' + Types::Standard: '1.000001' URI: '0' namespace::clean: '0' perl: '5.006' strict: '0' version: '0' - warnings: '0' resources: bugtracker: https://github.com/oalders/html-restrict/issues homepage: https://github.com/oalders/html-restrict repository: https://github.com/oalders/html-restrict.git -version: v2.5.0 +version: v3.0.0 x_Dist_Zilla: perl: version: '5.026001' @@ -78,7 +77,7 @@ x_Dist_Zilla: - class: Dist::Zilla::Plugin::MAXMIND::TidyAll name: '@Author::OALDERS/MAXMIND::TidyAll' - version: '0.83' + version: '0.13' - class: Dist::Zilla::Plugin::AutoPrereqs name: '@Author::OALDERS/AutoPrereqs' @@ -463,7 +462,7 @@ x_Dist_Zilla: branch: ~ changelog: Changes signed: 0 - tag: vv2.5.0 + tag: vv3.0.0 tag_format: v%v tag_message: v%v Dist::Zilla::Role::Git::Repo: @@ -531,6 +530,23 @@ x_Dist_Zilla: name: Prereqs version: '6.012' - + class: Dist::Zilla::Plugin::Git::Tag + config: + Dist::Zilla::Plugin::Git::Tag: + branch: ~ + changelog: Changes + signed: 0 + tag: v3.0.0 + tag_format: '%v' + tag_message: '%v' + Dist::Zilla::Role::Git::Repo: + git_version: 2.20.1 + repo_root: . + Dist::Zilla::Role::Git::StringFormatter: + time_zone: local + name: Git::Tag + version: '2.045' + - class: Dist::Zilla::Plugin::FinderCode name: ':InstallModules' version: '6.012' @@ -588,4 +604,4 @@ x_contributors: - 'perlpong <calyx238@gmail.com>' - 'skaufman <sam@socialflow.com>' x_generated_by_perl: v5.26.1 -x_serialization_backend: 'YAML::Tiny version 1.70' +x_serialization_backend: 'YAML::Tiny version 1.73' diff --git a/Makefile.PL b/Makefile.PL index 34e8077..16e61d2 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -25,22 +25,21 @@ my %WriteMakefileArgs = ( "Moo" => "1.002000", "Scalar::Util" => 0, "Sub::Quote" => 0, - "Type::Library" => 0, "Type::Tiny" => "1.002001", - "Type::Utils" => 0, + "Types::Standard" => "1.000001", "URI" => 0, "namespace::clean" => 0, "strict" => 0, - "version" => 0, - "warnings" => 0 + "version" => 0 }, "TEST_REQUIRES" => { "ExtUtils::MakeMaker" => 0, "File::Spec" => 0, "Test::Fatal" => 0, - "Test::More" => 0 + "Test::More" => 0, + "warnings" => 0 }, - "VERSION" => "v2.5.0", + "VERSION" => "v3.0.0", "test" => { "TESTS" => "t/*.t" } @@ -60,9 +59,8 @@ my %FallbackPrereqs = ( "Sub::Quote" => 0, "Test::Fatal" => 0, "Test::More" => 0, - "Type::Library" => 0, "Type::Tiny" => "1.002001", - "Type::Utils" => 0, + "Types::Standard" => "1.000001", "URI" => 0, "namespace::clean" => 0, "strict" => 0, @@ -4,7 +4,7 @@ HTML::Restrict - Strip unwanted HTML tags and attributes # VERSION -version v2.5.0 +version v3.0.0 # SYNOPSIS @@ -233,30 +233,6 @@ HTML::Restrict recognizes: $html = $hr->process( $html ); # $html is now: "<!-- comments! -->foo" -- max\_parser\_loops => \[Integer\] - - Defaults to 25. Should never be less than 2. - - As of v2.4.0, calling `process()` will force the parser to clean the text - multiple times, stopping only once the text is no longer changed or once - `max_parser_loops` has been reached. - - The reason for this is that [HTML::Parser](https://metacpan.org/pod/HTML::Parser) could take malformed HTML and turn - it into well formed HTML. This can defeat our processing logic and allow - malicious input to be returned. In order to mitigate this, we will clean all - input at least two times. If the second attempt at cleaning does not match - the previous attempt, we will make a third attempt and so on. This helps to - ensure that we get the expected output. - - If we are unable to get unchanged values after reaching `max_parser_loops`, an - exception will be thrown. Returning partially cleaned text would be wrong, as - would be returning `undef` or an empty string. Throwing an exception forces - the user to choose the appropriate way of dealing with this. - - If you choose to set this value, please note that it can be no less than 2, or - the parser will never be able to make a comparison with a previous value. An - exception will be thrown if you attempt to set this to a value less than 2. - - replace\_img => \[0|1|CodeRef\] Set the value to true if you'd like to have img tags replaced with @@ -6,15 +6,13 @@ requires "List::Util" => "1.33"; requires "Moo" => "1.002000"; requires "Scalar::Util" => "0"; requires "Sub::Quote" => "0"; -requires "Type::Library" => "0"; requires "Type::Tiny" => "1.002001"; -requires "Type::Utils" => "0"; +requires "Types::Standard" => "1.000001"; requires "URI" => "0"; requires "namespace::clean" => "0"; requires "perl" => "5.006"; requires "strict" => "0"; requires "version" => "0"; -requires "warnings" => "0"; on 'test' => sub { requires "ExtUtils::MakeMaker" => "0"; @@ -22,6 +20,7 @@ on 'test' => sub { requires "Test::Fatal" => "0"; requires "Test::More" => "0"; requires "perl" => "5.006"; + requires "warnings" => "0"; }; on 'test' => sub { @@ -51,6 +50,7 @@ on 'develop' => sub { requires "Test::Spelling" => "0.12"; requires "Test::Synopsis" => "0"; requires "Test::Vars" => "0.014"; + requires "warnings" => "0"; }; on 'develop' => sub { @@ -9,3 +9,7 @@ main_module = lib/HTML/Restrict.pm [Prereqs] Type::Tiny = 1.002001 + +[Git::Tag] +tag_format = %v +tag_message = %v diff --git a/lib/HTML/Restrict.pm b/lib/HTML/Restrict.pm index 6c2424d..fce0fba 100644 --- a/lib/HTML/Restrict.pm +++ b/lib/HTML/Restrict.pm @@ -4,20 +4,13 @@ use 5.006; package HTML::Restrict; use version; -our $VERSION = 'v2.5.0'; +our $VERSION = 'v3.0.0'; use Carp qw( croak ); use Data::Dump qw( dump ); -use HTML::Entities qw( encode_entities ); use HTML::Parser (); -use HTML::Restrict::Types qw( - ArrayRef - Bool - CodeRef - HashRef - Int - MaxParserLoops -); +use HTML::Entities qw( encode_entities ); +use Types::Standard 1.000001 qw[ Bool HashRef ArrayRef CodeRef ]; use List::Util 1.33 qw( any none ); use Scalar::Util qw( reftype weaken ); use Sub::Quote 'quote_sub'; @@ -44,12 +37,6 @@ has debug => ( default => 0, ); -has max_parser_loops => ( - is => 'rw', - isa => MaxParserLoops, - default => 25, -); - has parser => ( is => 'ro', lazy => 1, @@ -280,6 +267,7 @@ sub _build_parser { sub { my ( $p, $text ) = @_; print "text: $text\n" if $self->debug; + $text = _fix_text_encoding($text); if ( !@{ $self->_stripper_stack } ) { $self->_processed( ( $self->_processed || q{} ) . $text ); } @@ -315,29 +303,6 @@ sub _build_parser { sub process { my $self = shift; - my $cleaned = $self->_process(@_); - return $cleaned if !$cleaned; - - my $previous_iteration = $cleaned; - - my $i = 1; # We already cleaned once just above - - while ( $i < $self->max_parser_loops ) { - $i++; - my $new = $self->_process($previous_iteration); - last if $new eq $previous_iteration; - if ( $i == $self->max_parser_loops ) { - die sprintf( 'Could not clean input after %s attempts', $i ); - } - - $previous_iteration = $new; - } - return $previous_iteration; -} - -sub _process { - my $self = shift; - # returns undef if no value was passed return if !@_; return $_[0] if !$_[0]; @@ -388,6 +353,39 @@ sub _delete_tag_from_stack { return; } +# regex for entities that don't require a terminating semicolon +my ($short_entity_re) + = map qr/$_/i, + join '|', + '#x[0-9a-f]+', + '#[0-9]+', + grep !/;\z/, + sort keys %HTML::Entities::entity2char; + +# semicolon required +my ($complete_entity_re) + = map qr/$_/i, + join '|', + grep /;\z/, + sort keys %HTML::Entities::entity2char; + +sub _fix_text_encoding { + my $text = shift; + $text =~ s{ + & + (?: + ($short_entity_re);? + | + ($complete_entity_re) + )? + }{ + defined $1 ? "&$1;" + : defined $2 ? "&$2" + : "&" + }xgie; + return encode_entities( $text, '<>' ); +} + 1; # End of HTML::Restrict # ABSTRACT: Strip unwanted HTML tags and attributes @@ -404,7 +402,7 @@ HTML::Restrict - Strip unwanted HTML tags and attributes =head1 VERSION -version v2.5.0 +version v3.0.0 =head1 SYNOPSIS @@ -635,30 +633,6 @@ feature is off by default. $html = $hr->process( $html ); # $html is now: "<!-- comments! -->foo" -=item * max_parser_loops => [Integer] - -Defaults to 25. Should never be less than 2. - -As of v2.4.0, calling C<process()> will force the parser to clean the text -multiple times, stopping only once the text is no longer changed or once -C<max_parser_loops> has been reached. - -The reason for this is that L<HTML::Parser> could take malformed HTML and turn -it into well formed HTML. This can defeat our processing logic and allow -malicious input to be returned. In order to mitigate this, we will clean all -input at least two times. If the second attempt at cleaning does not match -the previous attempt, we will make a third attempt and so on. This helps to -ensure that we get the expected output. - -If we are unable to get unchanged values after reaching C<max_parser_loops>, an -exception will be thrown. Returning partially cleaned text would be wrong, as -would be returning C<undef> or an empty string. Throwing an exception forces -the user to choose the appropriate way of dealing with this. - -If you choose to set this value, please note that it can be no less than 2, or -the parser will never be able to make a comparison with a previous value. An -exception will be thrown if you attempt to set this to a value less than 2. - =item * replace_img => [0|1|CodeRef] Set the value to true if you'd like to have img tags replaced with diff --git a/lib/HTML/Restrict/Types.pm b/lib/HTML/Restrict/Types.pm deleted file mode 100644 index ba80a1e..0000000 --- a/lib/HTML/Restrict/Types.pm +++ /dev/null @@ -1,50 +0,0 @@ -package HTML::Restrict::Types; -our $VERSION = 'v2.5.0'; -use strict; -use warnings; - -use Type::Library -base; -use Type::Utils (); - -BEGIN { - Type::Utils::extends( 'Types::Common::Numeric', 'Types::Standard', ); -} - -__PACKAGE__->add_type( - { - name => 'MaxParserLoops', - parent => PositiveInt, - constraint => '$_ >= 2', - } -); - -1; - -=pod - -=encoding UTF-8 - -=head1 NAME - -HTML::Restrict::Types - Type library for HTML::Restrict - -=head1 VERSION - -version v2.5.0 - -=head1 AUTHOR - -Olaf Alders <olaf@wundercounter.com> - -=head1 COPYRIGHT AND LICENSE - -This software is copyright (c) 2013-2017 by Olaf Alders. - -This is free software; you can redistribute it and/or modify it under -the same terms as the Perl 5 programming language system itself. - -=cut - -__END__ - -# ABSTRACT: Type library for HTML::Restrict diff --git a/t/00-report-prereqs.dd b/t/00-report-prereqs.dd index ce615bf..31bf62d 100644 --- a/t/00-report-prereqs.dd +++ b/t/00-report-prereqs.dd @@ -26,7 +26,8 @@ do { my $x = { 'Test::Pod::Coverage' => '1.08', 'Test::Spelling' => '0.12', 'Test::Synopsis' => '0', - 'Test::Vars' => '0.014' + 'Test::Vars' => '0.014', + 'warnings' => '0' } }, 'runtime' => { @@ -39,15 +40,13 @@ do { my $x = { 'Moo' => '1.002000', 'Scalar::Util' => '0', 'Sub::Quote' => '0', - 'Type::Library' => '0', 'Type::Tiny' => '1.002001', - 'Type::Utils' => '0', + 'Types::Standard' => '1.000001', 'URI' => '0', 'namespace::clean' => '0', 'perl' => '5.006', 'strict' => '0', - 'version' => '0', - 'warnings' => '0' + 'version' => '0' } }, 'test' => { @@ -59,7 +58,8 @@ do { my $x = { 'File::Spec' => '0', 'Test::Fatal' => '0', 'Test::More' => '0', - 'perl' => '5.006' + 'perl' => '5.006', + 'warnings' => '0' } } }; diff --git a/t/empty-element-tags.t b/t/empty-element-tags.t index ce9cd9a..e6dc3f0 100644 --- a/t/empty-element-tags.t +++ b/t/empty-element-tags.t @@ -12,9 +12,9 @@ one element open & close break no space<br/> EOF my $after = <<'EOF'; -two element open & close break<br></br> -one element open & close break <br> -one element open & close break no space<br> +two element open & close break<br></br> +one element open & close break <br> +one element open & close break no space<br> EOF my $hr = HTML::Restrict->new( diff --git a/t/malformed-html.t b/t/malformed-html.t index afb1288..d838ec1 100755 --- a/t/malformed-html.t +++ b/t/malformed-html.t @@ -22,31 +22,21 @@ my $html = '<<input>div onmouseover="alert(1);">hover over me<<input>/div>'; { my $hr = HTML::Restrict->new; is( - $hr->process($html), 'hover over me', + $hr->process( + '<<input>div onmouseover="alert(1);">hover over me<<input>/div>'), + '<div onmouseover="alert(1);">hover over me</div>', 'malformed HTML is correctly cleaned' ); } { - my $attempts = 2; - my $hr = HTML::Restrict->new( max_parser_loops => $attempts ); - like( - exception { $hr->process($html) }, - qr/after $attempts attempts/, - 'dies after max loops exceeded', + my $hr = HTML::Restrict->new; + is( + $hr->process( + '&<input></input>lt; θ &aMp; 2 P «g;'), + '&lt; θ &aMp; 2 P «g;', + 'badly encoded entities corrected' ); - $hr->max_parser_loops(3); - is( $hr->process('<foo>bar'), 'bar', 'can parse after caught exception' ); -} - -{ - for my $i ( -1 .. 1 ) { - like( - exception { HTML::Restrict->new( max_parser_loops => $i ) }, - qr/did not pass type constraint/i, - 'max_parser_loops cannot be ' . $i, - ); - } } done_testing(); diff --git a/xt/author/pod-spell.t b/xt/author/pod-spell.t index 5a8e4d9..a813d52 100644 --- a/xt/author/pod-spell.t +++ b/xt/author/pod-spell.t @@ -38,7 +38,6 @@ Raybec Restrict Schmidt TerMarsch -Types XSS benkasminbullock bolded |