summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Shadura <andrewsh@debian.org>2017-08-30 15:33:58 +0300
committerAndrew Shadura <andrewsh@debian.org>2017-08-30 15:33:58 +0300
commitec06787d2bdda608b8b24ca8b29d9edbea81ac6a (patch)
treeb535394603d7bf1b87e2c2dc83883ae61c449e91
Import original source of HTML-Restrict 2.2.4
-rw-r--r--CONTRIBUTORS21
-rw-r--r--Changes117
-rw-r--r--INSTALL43
-rw-r--r--LICENSE379
-rw-r--r--MANIFEST36
-rw-r--r--META.json95
-rw-r--r--META.yml56
-rw-r--r--Makefile.PL73
-rw-r--r--README.md349
-rw-r--r--cpanfile35
-rw-r--r--dist.ini9
-rwxr-xr-xexamples/sanitize_file.pl23
-rw-r--r--lib/HTML/Restrict.pm686
-rw-r--r--perlcriticrc86
-rw-r--r--perltidyrc12
-rw-r--r--t/00-load.t125
-rw-r--r--t/attribute_constraints.t66
-rw-r--r--t/author-pod-coverage.t15
-rw-r--r--t/author-pod-spell.t74
-rw-r--r--t/author-synopsis.t13
-rw-r--r--t/author-tidyall.t19
-rw-r--r--t/comments.t33
-rw-r--r--t/declaration.t22
-rw-r--r--t/js.t24
-rw-r--r--t/lowercase.t29
-rw-r--r--t/memory-leak.t24
-rw-r--r--t/perlcriticrc4
-rw-r--r--t/pod.t12
-rw-r--r--t/release-cpan-changes.t18
-rw-r--r--t/replace_img.t62
-rw-r--r--t/scheme.t70
-rw-r--r--t/stack.t19
-rw-r--r--t/style.t24
-rwxr-xr-xt/xss.t35
-rw-r--r--tidyall.ini26
35 files changed, 2734 insertions, 0 deletions
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..285239f
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,21 @@
+
+# HTML-RESTRICT CONTRIBUTORS #
+
+This is the (likely incomplete) list of people who have helped
+make this distribution what it is, either via code contributions,
+patches, bug reports, help with troubleshooting, etc. A huge
+'thank you' to all of them.
+
+ * Arthur Axel 'fREW' Schmidt
+ * Ben Bullock
+ * David Golden
+ * Fitz Elliott
+ * Graham Knop
+ * Graham TerMarsch
+ * Karen Etheridge
+ * Mark Jubenville
+ * Olaf Alders
+ * perlpong
+ * skaufman
+
+
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..8d6b733
--- /dev/null
+++ b/Changes
@@ -0,0 +1,117 @@
+Revision history for HTML-Restrict
+
+2.2.4 2017-03-01 14:22:13-05:00 America/Toronto
+ - Updates SEE ALSO pod section (Ben Bullock)
+
+2.2.3 2016-07-26 18:00:55-04:00 America/Toronto
+ - Now with fewer non-core prerequisites (Karen Etheridge)
+
+2.2.2 2014-10-16 22:16:29-04:00 America/Toronto
+ - Fixes attribute value encoding. (Graham Knop)
+
+2.2.1 2014-08-18 17:22:05EDT-0400 America/Toronto
+ - Replace MooX::Types::MooseLike with Type::Tiny. (Samuel Kaufman)
+
+2.2.0 2014-08-15 23:01:22EDT-0400 America/Toronto
+ - Fix Changes file
+ - Bump to next minor version rather than changing the format of the
+ version numbers
+
+2.1.9 2013-12-10
+ - Swaps namespace::clean for namespace::autoclean. (Fitz Elliott)
+
+2.1.8 2013-06-06
+ - Unlike 2.1.7, This release doesn't contain a local::lib.
+
+2.1.7 2013-06-05
+ - Removes Perl6::Junction in order to squash smartmatch warnings.
+
+2.1.6 2013-05-06
+ - Fixes incorrect use of Moo's weak_ref. (Thanks to Carwyn Ellis,
+ Dagfinn Ilmari Mannsåker and Graham Knop for help with this.)
+
+2.1.5 2013-05-01
+ - Eliminates a circular reference. (Graham TerMarsch)
+
+2.1.4 2013-04-19
+ - Minor version should have been bumped in previous release. To bump it
+ now would be confusing.
+ - Require Moo 1.001000 as 1.000008 breaks this module.
+
+2.1.3 2013-04-18
+ - Add replace_img feature (David Golden).
+ - Add 'cite' to list of attributes for URI scheme checks (David
+ Golden).
+
+2.1.2 2013-03-20
+ - Fixes broken Pod in example script.
+ - Adds x_contributors to META.json.
+
+2.1.1 2013-03-08
+ - Updates Pod.
+
+2.1.0 2013-03-02
+ - Allows attributes to be validated against regexes (perlpong).
+
+2.0.0 2013-02-27
+ - Bumps version to 2.0.0 due to new features breaking backwards
+ compatibility.
+ - Forces all tag rules to be provided in lower case. Breaks
+ backcompat, but mixed or upper case rules would have been silently
+ ignored. So, this is really a bug fix.
+ - Strips content between script tags by default. Breaks backcompat,
+ but the default behaviour was likely not what anyone wanted.
+ - Strips content between style tags by default. Breaks backcompat,
+ but the default behaviour was likely not what anyone wanted.
+ - Adds Pod for previously undocumented allow_comments,
+ allow_declaration and strip_enclosed_content.
+ - Ensures stripper stack is always reset (perlpong).
+ - Protects against undef string comparison (perlpong).
+ - Checks URI scheme for src attributes (perlpong).
+ - Removes accessor/mutator documentation. Would prefer if all params
+ were set at object instatiation.
+ - Adds ridiculously basic example script.
+
+
+1.0.4 2012-05-22
+ - Explicity require MooX::Types::MooseLike rather than
+ MooX::Types::MooseLike::Base (Matt Phillips/Github #2)
+
+1.0.3 2012-03-27
+ - Replaces Moose with Moo for speed in non-persistent apps (Arthur Axel
+ 'fREW' Schmidt)
+ - Adds uri scheme checking for href and src attributes
+ - Adds (undocumented) allow_comments() and allow_declartion() methods
+
+1.0.2 2011-10-11
+ - No longer emit warnings when pure HTML is supplied for processing
+ Rick Moore (RT #71553)
+
+1.0.1 2011-06-30
+ - Pod updates
+
+1.0.0 2011-06-30
+ - Bumped version to 1.0.0 This is no longer beta software.
+
+0.06 2010-03-22
+ - Fixed RT #55775 "Strings equating to boolean false returned as undef"
+ (Duncan Forsyth)
+
+0.05 2010-02-03
+ - process( undef ) returns without attempting to do anything. Avoids
+ warnings about uninitialized vars.
+
+0.04 2009-11-19
+ - MooseX::Params::Validate 0.12 now required. Some earlier versions do
+ not include pos_validated_list()
+
+0.03 2009-11-17
+ - process() no longer dies when passed an undef -- patch provided by
+ Mark Jubenville (ioncache)
+
+0.02 2009-09-17
+ - Improved documentation layout and clarity
+ Removed documentation for parser() method -- seems like a bad idea
+
+0.01 2009-09-17
+ - Beta release
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..5993a04
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,43 @@
+This is the Perl distribution HTML-Restrict.
+
+Installing HTML-Restrict is straightforward.
+
+## Installation with cpanm
+
+If you have cpanm, you only need one line:
+
+ % cpanm HTML::Restrict
+
+If it does not have permission to install modules to the current perl, cpanm
+will automatically set up and install to a local::lib in your home directory.
+See the local::lib documentation (https://metacpan.org/pod/local::lib) for
+details on enabling it in your environment.
+
+## Installing with the CPAN shell
+
+Alternatively, if your CPAN shell is set up, you should just be able to do:
+
+ % cpan HTML::Restrict
+
+## Manual installation
+
+As a last resort, you can manually install it. Download the tarball, untar it,
+then build it:
+
+ % perl Makefile.PL
+ % make && make test
+
+Then install it:
+
+ % make install
+
+If your perl is system-managed, you can create a local::lib in your home
+directory to install modules to. For details, see the local::lib documentation:
+https://metacpan.org/pod/local::lib
+
+## Documentation
+
+HTML-Restrict documentation is available as POD.
+You can run perldoc from a shell to read the documentation:
+
+ % perldoc HTML::Restrict
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5695594
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,379 @@
+This software is copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+Terms of the Perl programming language system itself
+
+a) the GNU General Public License as published by the Free
+ Software Foundation; either version 1, or (at your option) any
+ later version, or
+b) the "Artistic License"
+
+--- The GNU General Public License, Version 1, February 1989 ---
+
+This software is Copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software, licensed under:
+
+ The GNU General Public License, Version 1, February 1989
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 1, February 1989
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The license agreements of most software companies try to keep users
+at the mercy of those companies. By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must tell them their rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any program or other work which
+contains a notice placed by the copyright holder saying it may be
+distributed under the terms of this General Public License. The
+"Program", below, refers to any such program or work, and a "work based
+on the Program" means either the Program or any work containing the
+Program or a portion of it, either verbatim or with modifications. Each
+licensee is addressed as "you".
+
+ 1. You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this
+General Public License and to the absence of any warranty; and give any
+other recipients of the Program a copy of this General Public License
+along with the Program. You may charge a fee for the physical act of
+transferring a copy.
+
+ 2. You may modify your copy or copies of the Program or any portion of
+it, and copy and distribute such modifications under the terms of Paragraph
+1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating that
+ you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish, that
+ in whole or in part contains the Program or any part thereof, either
+ with or without modifications, to be licensed at no charge to all
+ third parties under the terms of this General Public License (except
+ that you may choose to grant warranty protection to some or all
+ third parties, at your option).
+
+ c) If the modified program normally reads commands interactively when
+ run, you must cause it, when started running for such interactive use
+ in the simplest and most usual way, to print or display an
+ announcement including an appropriate copyright notice and a notice
+ that there is no warranty (or else, saying that you provide a
+ warranty) and that users may redistribute the program under these
+ conditions, and telling the user how to view a copy of this General
+ Public License.
+
+ d) You may charge a fee for the physical act of transferring a
+ copy, and you may at your option offer warranty protection in
+ exchange for a fee.
+
+Mere aggregation of another independent work with the Program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other work under the scope of these terms.
+
+ 3. You may copy and distribute the Program (or a portion or derivative of
+it, under Paragraph 2) in object code or executable form under the terms of
+Paragraphs 1 and 2 above provided that you also do one of the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal charge
+ for the cost of distribution) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+Source code for a work means the preferred form of the work for making
+modifications to it. For an executable file, complete source code means
+all the source code for all modules it contains; but, as a special
+exception, it need not include source code for modules which are standard
+libraries that accompany the operating system on which the executable
+file runs, or for standard header files or definitions files that
+accompany that operating system.
+
+ 4. You may not copy, modify, sublicense, distribute or transfer the
+Program except as expressly provided under this General Public License.
+Any attempt otherwise to copy, modify, sublicense, distribute or transfer
+the Program is void, and will automatically terminate your rights to use
+the Program under this License. However, parties who have received
+copies, or rights to use copies, from you under this General Public
+License will not have their licenses terminated so long as such parties
+remain in full compliance.
+
+ 5. By copying, distributing or modifying the Program (or any work based
+on the Program) you indicate your acceptance of this license to do so,
+and all its terms and conditions.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the original
+licensor to copy, distribute or modify the Program subject to these
+terms and conditions. You may not impose any further restrictions on the
+recipients' exercise of the rights granted herein.
+
+ 7. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of the license which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+the license, you may choose any version ever published by the Free Software
+Foundation.
+
+ 8. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+ To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19xx name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ program `Gnomovision' (a program to direct compilers to make passes
+ at assemblers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
+--- The Artistic License 1.0 ---
+
+This software is Copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software, licensed under:
+
+ The Artistic License 1.0
+
+The Artistic License
+
+Preamble
+
+The intent of this document is to state the conditions under which a Package
+may be copied, such that the Copyright Holder maintains some semblance of
+artistic control over the development of the package, while giving the users of
+the package the right to use and distribute the Package in a more-or-less
+customary fashion, plus the right to make reasonable modifications.
+
+Definitions:
+
+ - "Package" refers to the collection of files distributed by the Copyright
+ Holder, and derivatives of that collection of files created through
+ textual modification.
+ - "Standard Version" refers to such a Package if it has not been modified,
+ or has been modified in accordance with the wishes of the Copyright
+ Holder.
+ - "Copyright Holder" is whoever is named in the copyright or copyrights for
+ the package.
+ - "You" is you, if you're thinking about copying or distributing this Package.
+ - "Reasonable copying fee" is whatever you can justify on the basis of media
+ cost, duplication charges, time of people involved, and so on. (You will
+ not be required to justify it to the Copyright Holder, but only to the
+ computing community at large as a market that must bear the fee.)
+ - "Freely Available" means that no fee is charged for the item itself, though
+ there may be fees involved in handling the item. It also means that
+ recipients of the item may redistribute it under the same conditions they
+ received it.
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications derived
+from the Public Domain or from the Copyright Holder. A Package modified in such
+a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided that
+you insert a prominent notice in each changed file stating how and when you
+changed that file, and provided that you do at least ONE of the following:
+
+ a) place your modifications in the Public Domain or otherwise make them
+ Freely Available, such as by posting said modifications to Usenet or an
+ equivalent medium, or placing the modifications on a major archive site
+ such as ftp.uu.net, or by allowing the Copyright Holder to include your
+ modifications in the Standard Version of the Package.
+
+ b) use the modified Package only within your corporation or organization.
+
+ c) rename any non-standard executables so the names do not conflict with
+ standard executables, which must also be provided, and provide a separate
+ manual page for each non-standard executable that clearly documents how it
+ differs from the Standard Version.
+
+ d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or executable
+form, provided that you do at least ONE of the following:
+
+ a) distribute a Standard Version of the executables and library files,
+ together with instructions (in the manual page or equivalent) on where to
+ get the Standard Version.
+
+ b) accompany the distribution with the machine-readable source of the Package
+ with your modifications.
+
+ c) accompany any non-standard executables with their corresponding Standard
+ Version executables, giving the non-standard executables non-standard
+ names, and clearly documenting the differences in manual pages (or
+ equivalent), together with instructions on where to get the Standard
+ Version.
+
+ d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package. You may charge any fee you choose for support of this Package. You
+may not charge a fee for this Package itself. However, you may distribute this
+Package in aggregate with other (possibly commercial) programs as part of a
+larger (possibly commercial) software distribution provided that you do not
+advertise this Package as a product of your own.
+
+6. The scripts and library files supplied as input to or produced as output
+from the programs of this Package do not automatically fall under the copyright
+of this Package, but belong to whomever generated them, and may be sold
+commercially, and may be aggregated with this Package.
+
+7. C or perl subroutines supplied by you and linked into this Package shall not
+be considered part of this Package.
+
+8. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+The End
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..6cb9a59
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,36 @@
+# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.008.
+CONTRIBUTORS
+Changes
+INSTALL
+LICENSE
+MANIFEST
+META.json
+META.yml
+Makefile.PL
+README.md
+cpanfile
+dist.ini
+examples/sanitize_file.pl
+lib/HTML/Restrict.pm
+perlcriticrc
+perltidyrc
+t/00-load.t
+t/attribute_constraints.t
+t/author-pod-coverage.t
+t/author-pod-spell.t
+t/author-synopsis.t
+t/author-tidyall.t
+t/comments.t
+t/declaration.t
+t/js.t
+t/lowercase.t
+t/memory-leak.t
+t/perlcriticrc
+t/pod.t
+t/release-cpan-changes.t
+t/replace_img.t
+t/scheme.t
+t/stack.t
+t/style.t
+t/xss.t
+tidyall.ini
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..f1fd977
--- /dev/null
+++ b/META.json
@@ -0,0 +1,95 @@
+{
+ "abstract" : "Strip unwanted HTML tags and attributes",
+ "author" : [
+ "Olaf Alders <olaf@wundercounter.com>"
+ ],
+ "dynamic_config" : 0,
+ "generated_by" : "Dist::Zilla version 6.008, CPAN::Meta::Converter version 2.150005",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : 2
+ },
+ "name" : "HTML-Restrict",
+ "no_index" : {
+ "directory" : [
+ "examples",
+ "t",
+ "xt"
+ ]
+ },
+ "prereqs" : {
+ "configure" : {
+ "requires" : {
+ "ExtUtils::MakeMaker" : "0",
+ "perl" : "5.006"
+ }
+ },
+ "develop" : {
+ "requires" : {
+ "Pod::Coverage::TrustPod" : "0",
+ "Test::CPAN::Changes" : "0.19",
+ "Test::Code::TidyAll" : "0.50",
+ "Test::More" : "0.88",
+ "Test::Pod::Coverage" : "1.08",
+ "Test::Spelling" : "0.12",
+ "Test::Synopsis" : "0"
+ }
+ },
+ "runtime" : {
+ "requires" : {
+ "Carp" : "0",
+ "Data::Dump" : "0",
+ "HTML::Entities" : "0",
+ "HTML::Parser" : "0",
+ "List::Util" : "1.33",
+ "Moo" : "1.002000",
+ "Scalar::Util" : "0",
+ "Sub::Quote" : "0",
+ "Types::Standard" : "1.000001",
+ "URI" : "0",
+ "namespace::clean" : "0",
+ "perl" : "5.006",
+ "strict" : "0"
+ }
+ },
+ "test" : {
+ "requires" : {
+ "Test::Fatal" : "0",
+ "Test::More" : "0",
+ "perl" : "5.006",
+ "warnings" : "0"
+ }
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "bugtracker" : {
+ "web" : "https://github.com/oalders/html-restrict/issues"
+ },
+ "homepage" : "https://github.com/oalders/html-restrict",
+ "repository" : {
+ "type" : "git",
+ "url" : "https://github.com/oalders/html-restrict.git",
+ "web" : "https://github.com/oalders/html-restrict"
+ }
+ },
+ "version" : "2.2.4",
+ "x_contributors" : [
+ "Arthur Axel 'fREW' Schmidt <frioux@gmail.com>",
+ "Ben Bullock <benkasminbullock@gmail.com>",
+ "David Golden <dagolden@cpan.org>",
+ "Fitz Elliott <fitz.elliott@gmail.com>",
+ "Graham Knop <haarg@haarg.org>",
+ "Graham TerMarsch <graham@howlingfrog.com>",
+ "Karen Etheridge <ether@cpan.org>",
+ "Mark Jubenville <mark@raybec.com>",
+ "Olaf Alders <olaf@wundersolutions.com>",
+ "perlpong <calyx238@gmail.com>",
+ "skaufman <sam@socialflow.com>"
+ ],
+ "x_serialization_backend" : "Cpanel::JSON::XS version 3.0225"
+}
+
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..e4f5b68
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,56 @@
+---
+abstract: 'Strip unwanted HTML tags and attributes'
+author:
+ - 'Olaf Alders <olaf@wundercounter.com>'
+build_requires:
+ Test::Fatal: '0'
+ Test::More: '0'
+ perl: '5.006'
+ warnings: '0'
+configure_requires:
+ ExtUtils::MakeMaker: '0'
+ perl: '5.006'
+dynamic_config: 0
+generated_by: 'Dist::Zilla version 6.008, CPAN::Meta::Converter version 2.150005'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: '1.4'
+name: HTML-Restrict
+no_index:
+ directory:
+ - examples
+ - t
+ - xt
+requires:
+ Carp: '0'
+ Data::Dump: '0'
+ HTML::Entities: '0'
+ HTML::Parser: '0'
+ List::Util: '1.33'
+ Moo: '1.002000'
+ Scalar::Util: '0'
+ Sub::Quote: '0'
+ Types::Standard: '1.000001'
+ URI: '0'
+ namespace::clean: '0'
+ perl: '5.006'
+ strict: '0'
+resources:
+ bugtracker: https://github.com/oalders/html-restrict/issues
+ homepage: https://github.com/oalders/html-restrict
+ repository: https://github.com/oalders/html-restrict.git
+version: 2.2.4
+x_contributors:
+ - "Arthur Axel 'fREW' Schmidt <frioux@gmail.com>"
+ - 'Ben Bullock <benkasminbullock@gmail.com>'
+ - 'David Golden <dagolden@cpan.org>'
+ - 'Fitz Elliott <fitz.elliott@gmail.com>'
+ - 'Graham Knop <haarg@haarg.org>'
+ - 'Graham TerMarsch <graham@howlingfrog.com>'
+ - 'Karen Etheridge <ether@cpan.org>'
+ - 'Mark Jubenville <mark@raybec.com>'
+ - 'Olaf Alders <olaf@wundersolutions.com>'
+ - 'perlpong <calyx238@gmail.com>'
+ - 'skaufman <sam@socialflow.com>'
+x_serialization_backend: 'YAML::Tiny version 1.69'
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..570d8bf
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,73 @@
+# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.008.
+use strict;
+use warnings;
+
+use 5.006;
+
+use ExtUtils::MakeMaker;
+
+my %WriteMakefileArgs = (
+ "ABSTRACT" => "Strip unwanted HTML tags and attributes",
+ "AUTHOR" => "Olaf Alders <olaf\@wundercounter.com>",
+ "CONFIGURE_REQUIRES" => {
+ "ExtUtils::MakeMaker" => 0
+ },
+ "DISTNAME" => "HTML-Restrict",
+ "LICENSE" => "perl",
+ "MIN_PERL_VERSION" => "5.006",
+ "NAME" => "HTML::Restrict",
+ "PREREQ_PM" => {
+ "Carp" => 0,
+ "Data::Dump" => 0,
+ "HTML::Entities" => 0,
+ "HTML::Parser" => 0,
+ "List::Util" => "1.33",
+ "Moo" => "1.002000",
+ "Scalar::Util" => 0,
+ "Sub::Quote" => 0,
+ "Types::Standard" => "1.000001",
+ "URI" => 0,
+ "namespace::clean" => 0,
+ "strict" => 0
+ },
+ "TEST_REQUIRES" => {
+ "Test::Fatal" => 0,
+ "Test::More" => 0,
+ "warnings" => 0
+ },
+ "VERSION" => "2.2.4",
+ "test" => {
+ "TESTS" => "t/*.t"
+ }
+);
+
+
+my %FallbackPrereqs = (
+ "Carp" => 0,
+ "Data::Dump" => 0,
+ "HTML::Entities" => 0,
+ "HTML::Parser" => 0,
+ "List::Util" => "1.33",
+ "Moo" => "1.002000",
+ "Scalar::Util" => 0,
+ "Sub::Quote" => 0,
+ "Test::Fatal" => 0,
+ "Test::More" => 0,
+ "Types::Standard" => "1.000001",
+ "URI" => 0,
+ "namespace::clean" => 0,
+ "strict" => 0,
+ "warnings" => 0
+);
+
+
+unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) {
+ delete $WriteMakefileArgs{TEST_REQUIRES};
+ delete $WriteMakefileArgs{BUILD_REQUIRES};
+ $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs;
+}
+
+delete $WriteMakefileArgs{CONFIGURE_REQUIRES}
+ unless eval { ExtUtils::MakeMaker->VERSION(6.52) };
+
+WriteMakefile(%WriteMakefileArgs);
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0796222
--- /dev/null
+++ b/README.md
@@ -0,0 +1,349 @@
+# NAME
+
+HTML::Restrict - Strip unwanted HTML tags and attributes
+
+# VERSION
+
+version 2.2.4
+
+# SYNOPSIS
+
+ use HTML::Restrict;
+
+ my $hr = HTML::Restrict->new();
+
+ # use default rules to start with (strip away all HTML)
+ my $processed = $hr->process(' <b>i am bold</b> ');
+
+ # $processed now equals: 'i am bold'
+
+ # Now, a less restrictive example:
+ use HTML::Restrict;
+
+ my $hr = HTML::Restrict->new(
+ rules => {
+ b => [],
+ img => [qw( src alt / )]
+ }
+ );
+
+ my $html = q[<body><b>hello</b> <img src="pic.jpg" alt="me" id="test" /></body>];
+ my $processed = $hr->process( $html );
+
+ # $processed now equals: <b>hello</b> <img src="pic.jpg" alt="me" />
+
+# DESCRIPTION
+
+This module uses [HTML::Parser](https://metacpan.org/pod/HTML::Parser) to strip HTML from text in a restrictive
+manner. By default all HTML is restricted. You may alter the default
+behaviour by supplying your own tag rules.
+
+# CONSTRUCTOR AND STARTUP
+
+## new()
+
+Creates and returns a new HTML::Restrict object.
+
+ my $hr = HTML::Restrict->new()
+
+HTML::Restrict doesn't require any params to be passed to new. If your goal is
+to remove all HTML from text, then no further setup is required. Just pass
+your text to the process() method and you're done:
+
+ my $plain_text = $hr->process( $html );
+
+If you need to set up specific rules, have a look at the params which
+HTML::Restrict recognizes:
+
+- `rules => \%rules`
+
+ Sets the rules which will be used to process your data. By default all HTML
+ tags are off limits. Use this argument to define the HTML elements and
+ corresponding attributes you'd like to use. Essentially, consider the default
+ behaviour to be:
+
+ rules => {}
+
+ Rules should be passed as a HASHREF of allowed tags. Each hash value should
+ represent the allowed attributes for the listed tag. For example, if you want
+ to allow a fair amount of HTML, you can try something like this:
+
+ my %rules = (
+ a => [qw( href target )],
+ b => [],
+ caption => [],
+ center => [],
+ em => [],
+ i => [],
+ img => [qw( alt border height width src style )],
+ li => [],
+ ol => [],
+ p => [qw(style)],
+ span => [qw(style)],
+ strong => [],
+ sub => [],
+ sup => [],
+ table => [qw( style border cellspacing cellpadding align )],
+ tbody => [],
+ td => [],
+ tr => [],
+ u => [],
+ ul => [],
+ );
+
+ my $hr = HTML::Restrict->new( rules => \%rules )
+
+ Or, to allow only bolded text:
+
+ my $hr = HTML::Restrict->new( rules => { b => [] } );
+
+ Allow bolded text, images and some (but not all) image attributes:
+
+ my %rules = (
+ b => [ ],
+ img => [qw( src alt width height border / )
+ );
+ my $hr = HTML::Restrict->new( rules => \%rules );
+
+ Since [HTML::Parser](https://metacpan.org/pod/HTML::Parser) treats a closing slash as an attribute, you'll need to
+ add "/" to your list of allowed attributes if you'd like your tags to retain
+ closing slashes. For example:
+
+ my $hr = HTML::Restrict->new( rules =>{ hr => [] } );
+ $hr->process( "<hr />"); # returns: <hr>
+
+ my $hr = HTML::Restrict->new( rules =>{ hr => [qw( / )] } );
+ $hr->process( "<hr />"); # returns: <hr />
+
+ HTML::Restrict strips away any tags and attributes which are not explicitly
+ allowed. It also rebuilds your explicitly allowed tags and places their
+ attributes in the order in which they appear in your rules.
+
+ So, if you define the following rules:
+
+ my %rules = (
+ ...
+ img => [qw( src alt title width height id / )]
+ ...
+ );
+
+ then your image tags will all be built like this:
+
+ <img src=".." alt="..." title="..." width="..." height="..." id=".." />
+
+ This gives you greater consistency in your tag layout. If you don't care about
+ element order you don't need to pay any attention to this, but you should be
+ aware that your elements are being reconstructed rather than just stripped
+ down.
+
+ As of 2.1.0, you can also specify a regex to be tested against the attribute
+ value. This feature should be considered experimental for the time being:
+
+ my $hr = HTML::Restrict->new(
+ rules => {
+ iframe => [
+ qw( width height allowfullscreen ),
+ { src => qr{^http://www\.youtube\.com},
+ frameborder => qr{^(0|1)$},
+ }
+ ],
+ img => [ qw( alt ), { src => qr{^/my/images/} }, ],
+ },
+ );
+
+ my $html = '<img src="http://www.example.com/image.jpg" alt="Alt Text">';
+ my $processed = $hr->process( $html );
+
+ # $processed now equals: <img alt="Alt Text">
+
+- `trim => [0|1]`
+
+ By default all leading and trailing spaces will be removed when text is
+ processed. Set this value to 0 in order to disable this behaviour.
+
+- `uri_schemes => [undef, 'http', 'https', 'irc', ... ]`
+
+ As of version 1.0.3, URI scheme checking is performed on all href and src tag
+ attributes. The following schemes are allowed out of the box. No action is
+ required on your part:
+
+ [ undef, 'http', 'https' ]
+
+ (undef represents relative URIs). These restrictions have been put in place to
+ prevent XSS in the form of:
+
+ <a href="javascript:alert(document.cookie)">click for cookie!</a>
+
+ See [URI](https://metacpan.org/pod/URI) for more detailed info on scheme parsing. If, for example, you
+ wanted to filter out every scheme barring SSL, you would do it like this:
+
+ uri_schemes => ['https']
+
+ This feature is new in 1.0.3. Previous to this, there was no schema checking
+ at all. Moving forward, you'll need to whitelist explicitly all URI schemas
+ which are not supported by default. This is in keeping with the whitelisting
+ behaviour of this module and is also the safest possible approach. Keep in
+ mind that changes to uri\_schemes are not additive, so you'll need to include
+ the defaults in any changes you make, should you wish to keep them:
+
+ # defaults + irc + mailto
+ uri_schemes => [ 'undef', 'http', 'https', 'irc', 'mailto' ]
+
+- allow\_declaration => \[0|1\]
+
+ Set this value to true if you'd like to allow/preserve DOCTYPE declarations in
+ your content. Useful when cleaning up your own static files or templates. This
+ feature is off by default.
+
+ my $html = q[<!doctype html><body>foo</body>];
+
+ my $hr = HTML::Restrict->new( allow_declaration => 1 );
+ $html = $hr->process( $html );
+ # $html is now: "<!doctype html>foo"
+
+- allow\_comments => \[0|1\]
+
+ Set this value to true if you'd like to allow/preserve HTML comments in your
+ content. Useful when cleaning up your own static files or templates. This
+ feature is off by default.
+
+ my $html = q[<body><!-- comments! -->foo</body>];
+
+ my $hr = HTML::Restrict->new( allow_comments => 1 );
+ $html = $hr->process( $html );
+ # $html is now: "<!-- comments! -->foo"
+
+- replace\_img => \[0|1|CodeRef\]
+
+ Set the value to true if you'd like to have img tags replaced with
+ `[IMAGE: ...]` containing the alt attribute text. If you set it to a
+ code reference, you can provide your own replacement (which may
+ even contain HTML).
+
+ sub replacer {
+ my ($tagname, $attr, $text) = @_; # from HTML::Parser
+ return qq{<a href="$attr->{src}">IMAGE: $attr->{alt}</a>};
+ }
+
+ my $hr = HTML::Restrict->new( replace_img => \&replacer );
+
+ This attribute will only take effect if the img tag is not included
+ in the allowed HTML.
+
+- strip\_enclosed\_content => \[0|1\]
+
+ The default behaviour up to 1.0.4 was to preserve the content between script
+ and style tags, even when the tags themselves were being deleted. So, you'd be
+ left with a bunch of JavaScript or CSS, just with the enclosing tags missing.
+ This is almost never what you want, so starting at 1.0.5 the default will be to
+ remove any script or style info which is enclosed in these tags, unless they
+ have specifically been whitelisted in the rules. This will be a sane default
+ when cleaning up content submitted via a web form. However, if you're using
+ HTML::Restrict to purge your own HTML you can be more restrictive.
+
+ # strip the head section, in addition to JS and CSS
+ my $html = '<html><head>...</head><body>...<script>JS here</script>foo';
+
+ my $hr = HTML::Restrict->new(
+ strip_enclosed_content => [ 'script', 'style', 'head' ]
+ );
+
+ $html = $hr->process( $html );
+ # $html is now '<html><body>...foo';
+
+ The caveat here is that HTML::Restrict will not try to fix broken HTML. In the
+ above example, if you have any opening script, style or head tags which don't
+ also include matching closing tags, all following content will be stripped
+ away, regardless of any parent tags.
+
+ Keep in mind that changes to strip\_enclosed\_content are not additive, so if you
+ are adding additional tags you'll need to include the entire list of tags whose
+ enclosed content you'd like to remove. This feature strips script and style
+ tags by default.
+
+# SUBROUTINES/METHODS
+
+## process( $html )
+
+This is the method which does the real work. It parses your data, removes any
+tags and attributes which are not specifically allowed and returns the
+resulting text. Requires and returns a SCALAR.
+
+## get\_rules
+
+Accessor which returns a hash ref of the current rule set.
+
+## get\_uri\_schemes
+
+Accessor which returns an array ref of the current valid uri schemes.
+
+# CAVEATS
+
+Please note that all tag and attribute names passed via the rules param must be
+supplied in lower case.
+
+ # correct
+ my $hr = HTML::Restrict->new( rules => { body => ['onload'] } );
+
+ # throws a fatal error
+ my $hr = HTML::Restrict->new( rules => { Body => ['onLoad'] } );
+
+# MOTIVATION
+
+There are already several modules on the CPAN which accomplish much of the same
+thing, but after doing a lot of poking around, I was unable to find a solution
+with a simple setup which I was happy with.
+
+The most common use case might be stripping HTML from user submitted data
+completely or allowing just a few tags and attributes to be displayed. With
+the exception of URI scheme checking, this module doesn't do any validation on
+the actual content of the tags or attributes. If this is a requirement, you
+can either mess with the parser object, post-process the text yourself or have
+a look at one of the more feature-rich modules in the SEE ALSO section below.
+
+My aim here is to keep things easy and, hopefully, cover a lot of the less
+complex use cases with just a few lines of code and some brief documentation.
+The idea is to be up and running quickly.
+
+# SEE ALSO
+
+[HTML::TagFilter](https://metacpan.org/pod/HTML::TagFilter), [HTML::Defang](https://metacpan.org/pod/HTML::Defang), [MojoMojo::Declaw](https://metacpan.org/pod/MojoMojo::Declaw), [HTML::StripScripts](https://metacpan.org/pod/HTML::StripScripts),
+[HTML::Detoxifier](https://metacpan.org/pod/HTML::Detoxifier), HTML::Sanitizer, [HTML::Scrubber](https://metacpan.org/pod/HTML::Scrubber)
+
+# ACKNOWLEDGEMENTS
+
+Thanks to Raybec Communications [http://www.raybec.com](http://www.raybec.com) for funding my
+work on this module and for releasing it to the world.
+
+Thanks also to the following for patches, bug reports and assistance:
+
+Mark Jubenville (ioncache)
+
+Duncan Forsyth
+
+Rick Moore
+
+Arthur Axel 'fREW' Schmidt
+
+perlpong
+
+David Golden
+
+Graham TerMarsch
+
+Dagfinn Ilmari Mannsåker
+
+Graham Knop
+
+Carwyn Ellis
+
+# AUTHOR
+
+Olaf Alders <olaf@wundercounter.com>
+
+# COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
diff --git a/cpanfile b/cpanfile
new file mode 100644
index 0000000..5b011ff
--- /dev/null
+++ b/cpanfile
@@ -0,0 +1,35 @@
+requires "Carp" => "0";
+requires "Data::Dump" => "0";
+requires "HTML::Entities" => "0";
+requires "HTML::Parser" => "0";
+requires "List::Util" => "1.33";
+requires "Moo" => "1.002000";
+requires "Scalar::Util" => "0";
+requires "Sub::Quote" => "0";
+requires "Types::Standard" => "1.000001";
+requires "URI" => "0";
+requires "namespace::clean" => "0";
+requires "perl" => "5.006";
+requires "strict" => "0";
+
+on 'test' => sub {
+ requires "Test::Fatal" => "0";
+ requires "Test::More" => "0";
+ requires "perl" => "5.006";
+ requires "warnings" => "0";
+};
+
+on 'configure' => sub {
+ requires "ExtUtils::MakeMaker" => "0";
+ requires "perl" => "5.006";
+};
+
+on 'develop' => sub {
+ requires "Pod::Coverage::TrustPod" => "0";
+ requires "Test::CPAN::Changes" => "0.19";
+ requires "Test::Code::TidyAll" => "0.50";
+ requires "Test::More" => "0.88";
+ requires "Test::Pod::Coverage" => "1.08";
+ requires "Test::Spelling" => "0.12";
+ requires "Test::Synopsis" => "0";
+};
diff --git a/dist.ini b/dist.ini
new file mode 100644
index 0000000..bc62ed5
--- /dev/null
+++ b/dist.ini
@@ -0,0 +1,9 @@
+name = HTML-Restrict
+author = Olaf Alders <olaf@wundercounter.com>
+license = Perl_5
+copyright_holder = Olaf Alders
+copyright_year = 2013-2017
+version = 2.2.4
+main_module = lib/HTML/Restrict.pm
+
+[@Author::OALDERS]
diff --git a/examples/sanitize_file.pl b/examples/sanitize_file.pl
new file mode 100755
index 0000000..c2c3e31
--- /dev/null
+++ b/examples/sanitize_file.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use File::Slurp;
+use HTML::Restrict;
+
+my $filename = shift @ARGV;
+die "usage: perl $0 path/to/file > path/to/new/file" if !$filename;
+
+my $text = read_file($filename);
+
+my $hr = HTML::Restrict->new;
+print $hr->process($text);
+
+=pod
+
+=head1 SYNOPSIS
+
+ perl sanitize_file path/to/filename > path/to/sanitized/file
+
+=cut
diff --git a/lib/HTML/Restrict.pm b/lib/HTML/Restrict.pm
new file mode 100644
index 0000000..bb299e4
--- /dev/null
+++ b/lib/HTML/Restrict.pm
@@ -0,0 +1,686 @@
+use strict;
+use 5.006;
+
+package HTML::Restrict;
+$HTML::Restrict::VERSION = '2.2.4';
+use Carp qw( croak );
+use Data::Dump qw( dump );
+use HTML::Parser;
+use HTML::Entities qw( encode_entities );
+use Types::Standard 1.000001 qw[ Bool HashRef ArrayRef CodeRef ];
+use List::Util 1.33 qw( any none );
+use Scalar::Util qw( reftype weaken );
+use Sub::Quote 'quote_sub';
+use URI;
+
+use Moo 1.002000;
+use namespace::clean;
+
+has 'allow_comments' => (
+ is => 'rw',
+ isa => Bool,
+ default => 0,
+);
+
+has 'allow_declaration' => (
+ is => 'rw',
+ isa => Bool,
+ default => 0,
+);
+
+has 'debug' => (
+ is => 'rw',
+ isa => Bool,
+ default => 0,
+);
+
+has 'parser' => (
+ is => 'ro',
+ lazy => 1,
+ builder => '_build_parser',
+);
+
+has 'rules' => (
+ is => 'rw',
+ isa => HashRef,
+ required => 0,
+ default => quote_sub(q{ {} }),
+ trigger => \&_build_parser,
+ reader => 'get_rules',
+ writer => 'set_rules',
+);
+
+has 'strip_enclosed_content' => (
+ is => 'rw',
+ isa => ArrayRef,
+ default => sub { [ 'script', 'style' ] },
+);
+
+has 'replace_img' => (
+ is => 'rw',
+ isa => Bool | CodeRef,
+ default => 0,
+);
+
+has 'trim' => (
+ is => 'rw',
+ isa => Bool,
+ default => 1,
+);
+
+has 'uri_schemes' => (
+ is => 'rw',
+ isa => ArrayRef,
+ required => 0,
+ default => sub { [ undef, 'http', 'https' ] },
+ reader => 'get_uri_schemes',
+ writer => 'set_uri_schemes',
+);
+
+has '_processed' => (
+ is => 'rw',
+ isa => quote_sub(
+ q{
+ die "$_[0] is not false or a string!"
+ unless !defined($_[0]) || $_[0] eq "" || "$_[0]" eq '0' || ref(\$_[0]) eq 'SCALAR'
+ }
+ ),
+ clearer => '_clear_processed',
+);
+
+has '_stripper_stack' => (
+ is => 'rw',
+ isa => ArrayRef,
+ default => sub { [] },
+);
+
+sub _build_parser {
+
+ my $self = shift;
+ my $rules = shift;
+
+ # don't allow any upper case tag or attribute names
+ # these rules would otherwise silently be ignored
+ if ($rules) {
+ foreach my $tag_name ( keys %{$rules} ) {
+ if ( lc $tag_name ne $tag_name ) {
+ croak "All tag names must be lower cased";
+ }
+ if ( reftype $rules->{$tag_name} eq 'ARRAY' ) {
+ my @attr_names;
+ foreach my $attr_item ( @{ $rules->{$tag_name} } ) {
+ ref $attr_item eq 'HASH'
+ ? push( @attr_names, keys(%$attr_item) )
+ : push( @attr_names, $attr_item );
+ }
+ for (@attr_names) {
+ croak "All attribute names must be lower cased"
+ if lc $_ ne $_;
+ }
+ }
+ }
+ }
+
+ weaken($self);
+ return HTML::Parser->new(
+
+ start_h => [
+ sub {
+ my ( $p, $tagname, $attr, $text ) = @_;
+ print "starting tag: $tagname", "\n" if $self->debug;
+ my $more = q{};
+
+ if ( any { $_ eq $tagname } keys %{ $self->get_rules } ) {
+ print dump $attr if $self->debug;
+
+ foreach my $source_type ( 'href', 'src', 'cite' ) {
+
+ if ( $attr->{$source_type} ) {
+ my $uri = URI->new( $attr->{$source_type} );
+ if ( defined $uri->scheme ) {
+ delete $attr->{$source_type}
+ if none { $_ eq $uri->scheme }
+ grep defined, @{ $self->get_uri_schemes };
+ }
+ else { # relative uri
+ delete $attr->{$source_type}
+ unless grep !defined,
+ @{ $self->get_uri_schemes };
+ }
+ }
+ }
+
+ foreach
+ my $attr_item ( @{ $self->get_rules->{$tagname} } ) {
+ if ( ref $attr_item eq 'HASH' ) {
+
+ # validate against regex contraints
+ for my $attr_name ( sort keys %$attr_item ) {
+ if ( exists $attr->{$attr_name} ) {
+ my $value = encode_entities(
+ $attr->{$attr_name} );
+ $more .= qq[ $attr_name="$value" ]
+ if $attr->{$attr_name}
+ =~ $attr_item->{$attr_name};
+ }
+ }
+ }
+ else {
+ my $attr_name = $attr_item;
+ if ( exists $attr->{$attr_name} ) {
+ my $value
+ = encode_entities( $attr->{$attr_name} );
+ $more .= qq[ $attr_name="$value" ]
+ unless $attr_name eq q{/};
+ }
+ }
+ }
+
+ # closing slash should (naturally) close the tag
+ if ( exists $attr->{q{/}} && $attr->{q{/}} eq q{/} ) {
+ $more .= ' /';
+ }
+
+ my $elem = "<$tagname $more>";
+ $elem =~ s{\s*>}{>}gxms;
+ $elem =~ s{\s+}{ }gxms;
+
+ $self->_processed( ( $self->_processed || q{} ) . $elem );
+ }
+ elsif ( $tagname eq 'img' && $self->replace_img ) {
+ my $alt;
+ if ( ref $self->replace_img ) {
+ $alt = $self->replace_img->( $tagname, $attr, $text );
+ }
+ else {
+ $alt
+ = defined( $attr->{alt} ) ? ": $attr->{alt}" : "";
+ $alt = "[IMAGE$alt]";
+ }
+ $self->_processed( ( $self->_processed || q{} ) . $alt );
+ }
+ elsif (
+ any { $_ eq $tagname }
+ @{ $self->strip_enclosed_content }
+ ) {
+ print "adding $tagname to strippers" if $self->debug;
+ push @{ $self->_stripper_stack }, $tagname;
+ }
+
+ },
+ "self,tagname,attr,text"
+ ],
+
+ end_h => [
+ sub {
+ my ( $p, $tagname, $attr, $text ) = @_;
+ print "end: $text\n" if $self->debug;
+ if ( any { $_ eq $tagname } keys %{ $self->get_rules } ) {
+ $self->_processed( ( $self->_processed || q{} ) . $text );
+ }
+ elsif ( any { $_ eq $tagname } @{ $self->_stripper_stack } ) {
+ $self->_delete_tag_from_stack($tagname);
+ }
+
+ },
+ "self,tagname,attr,text"
+ ],
+
+ text_h => [
+ sub {
+ my ( $p, $text ) = @_;
+ print "text: $text\n" if $self->debug;
+ if ( !@{ $self->_stripper_stack } ) {
+ $self->_processed( ( $self->_processed || q{} ) . $text );
+ }
+ },
+ "self,text"
+ ],
+
+ comment_h => [
+ sub {
+ my ( $p, $text ) = @_;
+ print "comment: $text\n" if $self->debug;
+ if ( $self->allow_comments ) {
+ $self->_processed( ( $self->_processed || q{} ) . $text );
+ }
+ },
+ "self,text"
+ ],
+
+ declaration_h => [
+ sub {
+ my ( $p, $text ) = @_;
+ print "declaration: $text\n" if $self->debug;
+ if ( $self->allow_declaration ) {
+ $self->_processed( ( $self->_processed || q{} ) . $text );
+ }
+ },
+ "self,text"
+ ],
+
+ );
+
+}
+
+sub process {
+
+ my $self = shift;
+
+ # returns undef if no value was passed
+ return if !@_;
+ return $_[0] if !$_[0];
+
+ my ($content) = @_;
+ die 'content must be a string!'
+ unless ref( \$content ) eq 'SCALAR';
+ $self->_clear_processed;
+
+ my $parser = $self->parser;
+ $parser->parse($content);
+ $parser->eof;
+
+ my $text = $self->_processed;
+
+ if ( $self->trim && $text ) {
+ $text =~ s{\A\s*}{}gxms;
+ $text =~ s{\s*\z}{}gxms;
+ }
+ $self->_processed($text);
+
+ # ensure stripper stack is reset in case of broken html
+ $self->_stripper_stack( [] );
+
+ return $self->_processed;
+
+}
+
+# strip_enclosed_content tags could be nested in the source HTML, so we
+# maintain a stack of these tags.
+
+sub _delete_tag_from_stack {
+
+ my $self = shift;
+ my $closing_tag = shift;
+
+ my $found = 0;
+ my @tag_list = ();
+
+ foreach my $tag ( reverse @{ $self->_stripper_stack } ) {
+ if ( $tag eq $closing_tag && $found == 0 ) {
+ $found = 1;
+ next;
+ }
+ push @tag_list, $tag;
+ }
+
+ $self->_stripper_stack( [ reverse @tag_list ] );
+
+ return;
+}
+
+1; # End of HTML::Restrict
+
+# ABSTRACT: Strip unwanted HTML tags and attributes
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+HTML::Restrict - Strip unwanted HTML tags and attributes
+
+=head1 VERSION
+
+version 2.2.4
+
+=head1 SYNOPSIS
+
+ use HTML::Restrict;
+
+ my $hr = HTML::Restrict->new();
+
+ # use default rules to start with (strip away all HTML)
+ my $processed = $hr->process(' <b>i am bold</b> ');
+
+ # $processed now equals: 'i am bold'
+
+ # Now, a less restrictive example:
+ use HTML::Restrict;
+
+ my $hr = HTML::Restrict->new(
+ rules => {
+ b => [],
+ img => [qw( src alt / )]
+ }
+ );
+
+ my $html = q[<body><b>hello</b> <img src="pic.jpg" alt="me" id="test" /></body>];
+ my $processed = $hr->process( $html );
+
+ # $processed now equals: <b>hello</b> <img src="pic.jpg" alt="me" />
+
+=head1 DESCRIPTION
+
+This module uses L<HTML::Parser> to strip HTML from text in a restrictive
+manner. By default all HTML is restricted. You may alter the default
+behaviour by supplying your own tag rules.
+
+=head1 CONSTRUCTOR AND STARTUP
+
+=head2 new()
+
+Creates and returns a new HTML::Restrict object.
+
+ my $hr = HTML::Restrict->new()
+
+HTML::Restrict doesn't require any params to be passed to new. If your goal is
+to remove all HTML from text, then no further setup is required. Just pass
+your text to the process() method and you're done:
+
+ my $plain_text = $hr->process( $html );
+
+If you need to set up specific rules, have a look at the params which
+HTML::Restrict recognizes:
+
+=over 4
+
+=item * C<< rules => \%rules >>
+
+Sets the rules which will be used to process your data. By default all HTML
+tags are off limits. Use this argument to define the HTML elements and
+corresponding attributes you'd like to use. Essentially, consider the default
+behaviour to be:
+
+ rules => {}
+
+Rules should be passed as a HASHREF of allowed tags. Each hash value should
+represent the allowed attributes for the listed tag. For example, if you want
+to allow a fair amount of HTML, you can try something like this:
+
+ my %rules = (
+ a => [qw( href target )],
+ b => [],
+ caption => [],
+ center => [],
+ em => [],
+ i => [],
+ img => [qw( alt border height width src style )],
+ li => [],
+ ol => [],
+ p => [qw(style)],
+ span => [qw(style)],
+ strong => [],
+ sub => [],
+ sup => [],
+ table => [qw( style border cellspacing cellpadding align )],
+ tbody => [],
+ td => [],
+ tr => [],
+ u => [],
+ ul => [],
+ );
+
+ my $hr = HTML::Restrict->new( rules => \%rules )
+
+Or, to allow only bolded text:
+
+ my $hr = HTML::Restrict->new( rules => { b => [] } );
+
+Allow bolded text, images and some (but not all) image attributes:
+
+ my %rules = (
+ b => [ ],
+ img => [qw( src alt width height border / )
+ );
+ my $hr = HTML::Restrict->new( rules => \%rules );
+
+Since L<HTML::Parser> treats a closing slash as an attribute, you'll need to
+add "/" to your list of allowed attributes if you'd like your tags to retain
+closing slashes. For example:
+
+ my $hr = HTML::Restrict->new( rules =>{ hr => [] } );
+ $hr->process( "<hr />"); # returns: <hr>
+
+ my $hr = HTML::Restrict->new( rules =>{ hr => [qw( / )] } );
+ $hr->process( "<hr />"); # returns: <hr />
+
+HTML::Restrict strips away any tags and attributes which are not explicitly
+allowed. It also rebuilds your explicitly allowed tags and places their
+attributes in the order in which they appear in your rules.
+
+So, if you define the following rules:
+
+ my %rules = (
+ ...
+ img => [qw( src alt title width height id / )]
+ ...
+ );
+
+then your image tags will all be built like this:
+
+ <img src=".." alt="..." title="..." width="..." height="..." id=".." />
+
+This gives you greater consistency in your tag layout. If you don't care about
+element order you don't need to pay any attention to this, but you should be
+aware that your elements are being reconstructed rather than just stripped
+down.
+
+As of 2.1.0, you can also specify a regex to be tested against the attribute
+value. This feature should be considered experimental for the time being:
+
+ my $hr = HTML::Restrict->new(
+ rules => {
+ iframe => [
+ qw( width height allowfullscreen ),
+ { src => qr{^http://www\.youtube\.com},
+ frameborder => qr{^(0|1)$},
+ }
+ ],
+ img => [ qw( alt ), { src => qr{^/my/images/} }, ],
+ },
+ );
+
+ my $html = '<img src="http://www.example.com/image.jpg" alt="Alt Text">';
+ my $processed = $hr->process( $html );
+
+ # $processed now equals: <img alt="Alt Text">
+
+=item * C<< trim => [0|1] >>
+
+By default all leading and trailing spaces will be removed when text is
+processed. Set this value to 0 in order to disable this behaviour.
+
+=item * C<< uri_schemes => [undef, 'http', 'https', 'irc', ... ] >>
+
+As of version 1.0.3, URI scheme checking is performed on all href and src tag
+attributes. The following schemes are allowed out of the box. No action is
+required on your part:
+
+ [ undef, 'http', 'https' ]
+
+(undef represents relative URIs). These restrictions have been put in place to
+prevent XSS in the form of:
+
+ <a href="javascript:alert(document.cookie)">click for cookie!</a>
+
+See L<URI> for more detailed info on scheme parsing. If, for example, you
+wanted to filter out every scheme barring SSL, you would do it like this:
+
+ uri_schemes => ['https']
+
+This feature is new in 1.0.3. Previous to this, there was no schema checking
+at all. Moving forward, you'll need to whitelist explicitly all URI schemas
+which are not supported by default. This is in keeping with the whitelisting
+behaviour of this module and is also the safest possible approach. Keep in
+mind that changes to uri_schemes are not additive, so you'll need to include
+the defaults in any changes you make, should you wish to keep them:
+
+ # defaults + irc + mailto
+ uri_schemes => [ 'undef', 'http', 'https', 'irc', 'mailto' ]
+
+=item * allow_declaration => [0|1]
+
+Set this value to true if you'd like to allow/preserve DOCTYPE declarations in
+your content. Useful when cleaning up your own static files or templates. This
+feature is off by default.
+
+ my $html = q[<!doctype html><body>foo</body>];
+
+ my $hr = HTML::Restrict->new( allow_declaration => 1 );
+ $html = $hr->process( $html );
+ # $html is now: "<!doctype html>foo"
+
+=item * allow_comments => [0|1]
+
+Set this value to true if you'd like to allow/preserve HTML comments in your
+content. Useful when cleaning up your own static files or templates. This
+feature is off by default.
+
+ my $html = q[<body><!-- comments! -->foo</body>];
+
+ my $hr = HTML::Restrict->new( allow_comments => 1 );
+ $html = $hr->process( $html );
+ # $html is now: "<!-- comments! -->foo"
+
+=item * replace_img => [0|1|CodeRef]
+
+Set the value to true if you'd like to have img tags replaced with
+C<[IMAGE: ...]> containing the alt attribute text. If you set it to a
+code reference, you can provide your own replacement (which may
+even contain HTML).
+
+ sub replacer {
+ my ($tagname, $attr, $text) = @_; # from HTML::Parser
+ return qq{<a href="$attr->{src}">IMAGE: $attr->{alt}</a>};
+ }
+
+ my $hr = HTML::Restrict->new( replace_img => \&replacer );
+
+This attribute will only take effect if the img tag is not included
+in the allowed HTML.
+
+=item * strip_enclosed_content => [0|1]
+
+The default behaviour up to 1.0.4 was to preserve the content between script
+and style tags, even when the tags themselves were being deleted. So, you'd be
+left with a bunch of JavaScript or CSS, just with the enclosing tags missing.
+This is almost never what you want, so starting at 1.0.5 the default will be to
+remove any script or style info which is enclosed in these tags, unless they
+have specifically been whitelisted in the rules. This will be a sane default
+when cleaning up content submitted via a web form. However, if you're using
+HTML::Restrict to purge your own HTML you can be more restrictive.
+
+ # strip the head section, in addition to JS and CSS
+ my $html = '<html><head>...</head><body>...<script>JS here</script>foo';
+
+ my $hr = HTML::Restrict->new(
+ strip_enclosed_content => [ 'script', 'style', 'head' ]
+ );
+
+ $html = $hr->process( $html );
+ # $html is now '<html><body>...foo';
+
+The caveat here is that HTML::Restrict will not try to fix broken HTML. In the
+above example, if you have any opening script, style or head tags which don't
+also include matching closing tags, all following content will be stripped
+away, regardless of any parent tags.
+
+Keep in mind that changes to strip_enclosed_content are not additive, so if you
+are adding additional tags you'll need to include the entire list of tags whose
+enclosed content you'd like to remove. This feature strips script and style
+tags by default.
+
+=back
+
+=head1 SUBROUTINES/METHODS
+
+=head2 process( $html )
+
+This is the method which does the real work. It parses your data, removes any
+tags and attributes which are not specifically allowed and returns the
+resulting text. Requires and returns a SCALAR.
+
+=head2 get_rules
+
+Accessor which returns a hash ref of the current rule set.
+
+=head2 get_uri_schemes
+
+Accessor which returns an array ref of the current valid uri schemes.
+
+=head1 CAVEATS
+
+Please note that all tag and attribute names passed via the rules param must be
+supplied in lower case.
+
+ # correct
+ my $hr = HTML::Restrict->new( rules => { body => ['onload'] } );
+
+ # throws a fatal error
+ my $hr = HTML::Restrict->new( rules => { Body => ['onLoad'] } );
+
+=head1 MOTIVATION
+
+There are already several modules on the CPAN which accomplish much of the same
+thing, but after doing a lot of poking around, I was unable to find a solution
+with a simple setup which I was happy with.
+
+The most common use case might be stripping HTML from user submitted data
+completely or allowing just a few tags and attributes to be displayed. With
+the exception of URI scheme checking, this module doesn't do any validation on
+the actual content of the tags or attributes. If this is a requirement, you
+can either mess with the parser object, post-process the text yourself or have
+a look at one of the more feature-rich modules in the SEE ALSO section below.
+
+My aim here is to keep things easy and, hopefully, cover a lot of the less
+complex use cases with just a few lines of code and some brief documentation.
+The idea is to be up and running quickly.
+
+=head1 SEE ALSO
+
+L<HTML::TagFilter>, L<HTML::Defang>, L<MojoMojo::Declaw>, L<HTML::StripScripts>,
+L<HTML::Detoxifier>, HTML::Sanitizer, L<HTML::Scrubber>
+
+=head1 ACKNOWLEDGEMENTS
+
+Thanks to Raybec Communications L<http://www.raybec.com> for funding my
+work on this module and for releasing it to the world.
+
+Thanks also to the following for patches, bug reports and assistance:
+
+Mark Jubenville (ioncache)
+
+Duncan Forsyth
+
+Rick Moore
+
+Arthur Axel 'fREW' Schmidt
+
+perlpong
+
+David Golden
+
+Graham TerMarsch
+
+Dagfinn Ilmari Mannsåker
+
+Graham Knop
+
+Carwyn Ellis
+
+=head1 AUTHOR
+
+Olaf Alders <olaf@wundercounter.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/perlcriticrc b/perlcriticrc
new file mode 100644
index 0000000..7819a28
--- /dev/null
+++ b/perlcriticrc
@@ -0,0 +1,86 @@
+severity = 3
+verbose = 11
+
+theme = core + pbp + bugs + maintenance + cosmetic + complexity + security + tests + moose
+
+exclude = Subroutines::ProhibitCallsToUndeclaredSubs
+
+[BuiltinFunctions::ProhibitStringySplit]
+severity = 3
+
+[CodeLayout::RequireTrailingCommas]
+severity = 3
+
+[ControlStructures::ProhibitCStyleForLoops]
+severity = 3
+
+[InputOutput::RequireCheckedSyscalls]
+functions = :builtins
+exclude_functions = sleep
+severity = 3
+
+[Moose::RequireCleanNamespace]
+modules = Moose Moose::Role MooseX::Role::Parameterized Moose::Util::TypeConstraints
+cleaners = namespace::autoclean
+
+[NamingConventions::Capitalization]
+package_exemptions = [A-Z]\w+|minFraud
+file_lexical_variables = [A-Z]\w+|[^A-Z]+
+global_variables = :starts_with_upper
+scoped_lexical_variables = [A-Z]\w+|[^A-Z]+
+severity = 3
+
+# Given our code base, leaving this at 5 would be a huge pain
+[Subroutines::ProhibitManyArgs]
+max_arguments = 10
+
+[RegularExpressions::ProhibitComplexRegexes]
+max_characters = 200
+
+[RegularExpressions::ProhibitUnusualDelimiters]
+severity = 3
+
+[Subroutines::ProhibitUnusedPrivateSubroutines]
+private_name_regex = _(?!build)\w+
+skip_when_using = Moo::Role Moose::Role MooseX::Role::Parameterized Role::Tiny Test::Class::Moose::Role
+
+[TestingAndDebugging::ProhibitNoWarnings]
+allow = redefine
+
+[ValuesAndExpressions::ProhibitEmptyQuotes]
+severity = 3
+
+[ValuesAndExpressions::ProhibitInterpolationOfLiterals]
+severity = 3
+
+[ValuesAndExpressions::RequireUpperCaseHeredocTerminator]
+severity = 3
+
+[Variables::ProhibitPackageVars]
+add_packages = Test::Builder
+
+[TestingAndDebugging::RequireUseStrict]
+
+[TestingAndDebugging::RequireUseWarnings]
+
+[-ControlStructures::ProhibitCascadingIfElse]
+
+[-ErrorHandling::RequireCarping]
+[-InputOutput::RequireBriefOpen]
+
+[-ValuesAndExpressions::ProhibitConstantPragma]
+
+# No need for /xsm everywhere
+[-RegularExpressions::RequireDotMatchAnything]
+[-RegularExpressions::RequireExtendedFormatting]
+[-RegularExpressions::RequireLineBoundaryMatching]
+
+[-Subroutines::ProhibitExplicitReturnUndef]
+
+# http://stackoverflow.com/questions/2275317/why-does-perlcritic-dislike-using-shift-to-populate-subroutine-variables
+[-Subroutines::RequireArgUnpacking]
+
+[-Subroutines::RequireFinalReturn]
+
+# "use v5.14" is more readable than "use 5.014"
+[-ValuesAndExpressions::ProhibitVersionStrings]
diff --git a/perltidyrc b/perltidyrc
new file mode 100644
index 0000000..b7ed624
--- /dev/null
+++ b/perltidyrc
@@ -0,0 +1,12 @@
+--blank-lines-before-packages=0
+--iterations=2
+--no-outdent-long-comments
+-b
+-bar
+-boc
+-ci=4
+-i=4
+-l=78
+-nolq
+-se
+-wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
diff --git a/t/00-load.t b/t/00-load.t
new file mode 100644
index 0000000..1c1421c
--- /dev/null
+++ b/t/00-load.t
@@ -0,0 +1,125 @@
+#!perl
+
+use Test::More;
+
+use strict;
+use warnings;
+
+use Data::Dump;
+use HTML::Restrict;
+use Scalar::Util;
+
+my $version = $HTML::Restrict::VERSION || 'development';
+diag( "Testing HTML::Restrict $version, Perl $], $^X" );
+
+my $hr = HTML::Restrict->new( debug => 0 );
+isa_ok( $hr, 'HTML::Restrict' );
+
+isa_ok( $hr->parser, 'HTML::Parser' );
+
+my $default_rules = $hr->get_rules;
+
+cmp_ok( Scalar::Util::reftype( $default_rules ),
+ 'eq', 'HASH', "default rules are empty" );
+
+# basic stripping -- all tags strippped
+my $bold = '<b>i am bold</b>';
+my $processed = $hr->process( $bold );
+cmp_ok( $processed, 'eq', 'i am bold', "b tag stripped" );
+
+# updating rules
+my $b_rules = { b => [] };
+$hr->set_rules( $b_rules );
+my $updated_rules = $hr->get_rules;
+is_deeply( $b_rules, $updated_rules, "rules update correctly" );
+
+# ensure allowed tags aren't stripped
+$processed = $hr->process( $bold );
+cmp_ok( $processed, 'eq', $bold, "b tag not stripped" );
+
+# more complex set with multiple tags
+# ensure allowed tags aren't stripped and others are removed
+$hr->set_rules( { a => [qw( href target )] } );
+my $link
+ = q[<center><a href="http://google.com" target="_blank" id="test">google</a></center>];
+my $processed_link = $hr->process( $link );
+cmp_ok(
+ $processed_link, 'eq',
+ q[<a href="http://google.com" target="_blank">google</a>],
+ "allowed link but not center tag",
+);
+
+# ensure closing slash is maintained for tags
+# with no end tag
+$hr->set_rules( { img => [qw( src width height /)] } );
+my $img = q[<body><img src="/face.jpg" width="10" height="10" /></body>];
+my $processed_img = $hr->process( $img );
+cmp_ok(
+ $processed_img, 'eq',
+ '<img src="/face.jpg" width="10" height="10" />',
+ "closing slash preserved in image"
+);
+
+# rest rules to default set
+$hr->set_rules( {} );
+cmp_ok( $hr->process( $bold ), 'eq', 'i am bold', "back to default rules" );
+
+# stripping of comments
+cmp_ok( $hr->process( "<!-- comment this -->ok" ),
+ 'eq', 'ok', "comments are stripped" );
+
+# stripping of javascript includes
+cmp_ok(
+ $hr->process(
+ q{<script type="text/javascript" src="/js/jquery-1.3.2.js"></script>ok}
+ ),
+ 'eq', 'ok',
+ "javascript includes are stripped"
+);
+
+# stripping of css includes
+cmp_ok(
+ $hr->process(
+ q{<link href="/style.css" media="screen" rel="stylesheet" type="text/css" />ok}
+ ),
+ 'eq', 'ok',
+ "css includes are stripped"
+);
+
+ok( $hr->trim, "trim enabled by default" );
+
+# stripping of leading and trailing spaces
+cmp_ok( $hr->process( " ok ok ok " ),
+ 'eq', 'ok ok ok', "leading and trailing spaces trimmed" );
+
+# stripping of div tags
+cmp_ok( $hr->process( "<div>ok</div>" ),
+ 'eq', 'ok', "divs are stripped away" );
+
+# undef should be returned when no value is passed to the process method
+is( $hr->process(), undef, "undef is returned when no value passed" );
+
+
+# start fresh
+# RT #55775
+$hr = HTML::Restrict->new;
+
+cmp_ok( $hr->process( 0 ), 'eq', '0', "untrue values not processed");
+cmp_ok( $hr->process( '0' ), 'eq', '0', "untrue values not processed");
+cmp_ok( $hr->process( '000' ), 'eq', '000', "untrue values not processed");
+
+ok( !$hr->process("<html>"), "process only HTML" );
+
+
+# bugfix: check URI scheme for "src" attributes
+$hr = HTML::Restrict->new( rules => { img => [qw( src )] } );
+$hr->set_uri_schemes( [ undef, 'http', 'https' ] );
+cmp_ok( $hr->process('<img src="file:/some/file">'), 'eq', '<img>' );
+
+# bugfix: ensure stripper stack is reset in case of broken html
+$hr = HTML::Restrict->new;
+$hr->strip_enclosed_content( ['script'] );
+$hr->process('<script < b >');
+cmp_ok($hr->process('some text'), 'eq', 'some text', "stripper stack reset");
+
+done_testing();
diff --git a/t/attribute_constraints.t b/t/attribute_constraints.t
new file mode 100644
index 0000000..1f58bc8
--- /dev/null
+++ b/t/attribute_constraints.t
@@ -0,0 +1,66 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new(
+ rules => {
+ iframe => [
+ qw( width height ),
+ {
+ src => qr{^http://www\.youtube\.com},
+ frameborder => qr{^(0|1)$},
+ }
+ ],
+ },
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
+ ),
+ 'eq',
+ '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>',
+ 'all constraints pass',
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="0"></iframe>'
+ ),
+ 'eq',
+ '<iframe width="560" height="315" frameborder="0"></iframe>',
+ 'one constraint fails',
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="A"></iframe>'
+ ),
+ 'eq',
+ '<iframe width="560" height="315"></iframe>',
+ 'two constraints fail',
+);
+
+$hr = HTML::Restrict->new(
+ rules => {
+ iframe => [
+ { src => qr{^http://www\.youtube\.com} },
+ { frameborder => qr{^(0|1)$} },
+ { height => qr{^315$} },
+ { width => qr{^560$} },
+ ],
+ },
+);
+
+cmp_ok(
+ $hr->process(
+ '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
+ ),
+ 'eq',
+ '<iframe src="http://www.youtube.com/embed/9gKeRZM2Iyc" frameborder="0" height="315" width="560"></iframe>',
+ 'possible to maintain order',
+);
+
+done_testing;
diff --git a/t/author-pod-coverage.t b/t/author-pod-coverage.t
new file mode 100644
index 0000000..243340f
--- /dev/null
+++ b/t/author-pod-coverage.t
@@ -0,0 +1,15 @@
+#!perl
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests.
+
+use Test::Pod::Coverage 1.08;
+use Pod::Coverage::TrustPod;
+
+all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' });
diff --git a/t/author-pod-spell.t b/t/author-pod-spell.t
new file mode 100644
index 0000000..8b32a99
--- /dev/null
+++ b/t/author-pod-spell.t
@@ -0,0 +1,74 @@
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+use strict;
+use warnings;
+use Test::More;
+
+# generated by Dist::Zilla::Plugin::Test::PodSpelling 2.007004
+use Test::Spelling 0.12;
+use Pod::Wordlist;
+
+
+add_stopwords(<DATA>);
+all_pod_files_spelling_ok( qw( bin lib ) );
+__DATA__
+Alders
+Alders'
+Arthur
+Axel
+Ben
+Bullock
+Carwyn
+DOCTYPE
+Dagfinn
+David
+Elliott
+Etheridge
+Fitz
+Forsyth
+Golden
+Graham
+HTML
+Ilmari
+Jubenville
+Karen
+Knop
+Mannsåker
+Mark
+Olaf
+Raybec
+Restrict
+Schmidt
+TerMarsch
+XSS
+benkasminbullock
+bolded
+calyx238
+dagolden
+ether
+fREW
+fitz
+frioux
+graham
+haarg
+href
+img
+ioncache
+lib
+mark
+olaf
+param
+params
+perlpong
+sam
+schemas
+skaufman
+src
+whitelisted
+whitelisting
diff --git a/t/author-synopsis.t b/t/author-synopsis.t
new file mode 100644
index 0000000..5d1d4a7
--- /dev/null
+++ b/t/author-synopsis.t
@@ -0,0 +1,13 @@
+#!perl
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+
+use Test::Synopsis;
+
+all_synopsis_ok();
diff --git a/t/author-tidyall.t b/t/author-tidyall.t
new file mode 100644
index 0000000..f161a3e
--- /dev/null
+++ b/t/author-tidyall.t
@@ -0,0 +1,19 @@
+
+BEGIN {
+ unless ($ENV{AUTHOR_TESTING}) {
+ print qq{1..0 # SKIP these tests are for testing by the author\n};
+ exit
+ }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::Test::TidyAll v$VERSION
+
+use Test::More 0.88;
+use Test::Code::TidyAll 0.24;
+
+tidyall_ok(
+ verbose => ( exists $ENV{TEST_TIDYALL_VERBOSE} ? $ENV{TEST_TIDYALL_VERBOSE} : 0 ),
+ jobs => ( exists $ENV{TEST_TIDYALL_JOBS} ? $ENV{TEST_TIDYALL_JOBS} : 1 ),
+);
+
+done_testing;
diff --git a/t/comments.t b/t/comments.t
new file mode 100644
index 0000000..2ceccbb
--- /dev/null
+++ b/t/comments.t
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use HTML::Restrict;
+use Test::More;
+
+my $hr = HTML::Restrict->new;
+
+my $text = '<!-- comment here -->stuff';
+$hr->debug(0);
+
+is $hr->process($text), 'stuff', 'comments allowed';
+$hr->allow_comments(1);
+is $hr->process($text), $text, 'comments allowd';
+
+$text = 'before<!-- This is a comment -- -- So is this -->after';
+$hr->allow_comments(0);
+
+is $hr->process($text), 'beforeafter', 'comment allowed';
+
+$hr->allow_comments(1);
+is $hr->process($text), $text, 'comments allowd';
+
+$hr->allow_comments(0);
+$text = '<!-- <script> <h1> -->';
+is $hr->process($text), undef, 'tags nested in comments removed';
+
+#$hr->set_rules({ script => [], 'h1' => [] });
+#is $hr->process( $text ), $text, 'tags nested in comments not removed when explicitly allowed';
+
+done_testing();
diff --git a/t/declaration.t b/t/declaration.t
new file mode 100644
index 0000000..2d5e9bf
--- /dev/null
+++ b/t/declaration.t
@@ -0,0 +1,22 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use HTML::Restrict;
+use Test::More;
+
+my $hr = HTML::Restrict->new;
+
+my $text = '<!DOCTYPE HTML> ';
+$hr->debug(1);
+
+is $hr->process($text), '', 'declaration not preserved';
+$hr->allow_declaration(1);
+is $hr->process($text), '<!DOCTYPE HTML>', 'declaration is preserved';
+
+$text
+ = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
+is $hr->process($text), $text, 'declaration preserved';
+
+done_testing();
diff --git a/t/js.t b/t/js.t
new file mode 100644
index 0000000..6d25754
--- /dev/null
+++ b/t/js.t
@@ -0,0 +1,24 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my $hr = HTML::Restrict->new( debug => 0 );
+
+my $html = q[<script type="text/javascript">
+$(document).ready(function() {
+ $('a.gallery').fancybox();
+});
+</script>];
+
+is( $hr->process($html), undef, "content of script tags removed by default" );
+
+$hr->set_rules( { script => ['type'] } );
+
+is( $hr->process($html), $html, "content of script preserved" );
+
+done_testing();
diff --git a/t/lowercase.t b/t/lowercase.t
new file mode 100644
index 0000000..7284884
--- /dev/null
+++ b/t/lowercase.t
@@ -0,0 +1,29 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Fatal;
+use HTML::Restrict;
+
+my $html
+ = q[<!doctype html><!-- comments go here --><body onLoad="stuff">foo</body>];
+
+like(
+ exception {
+ my $hr = HTML::Restrict->new( rules => { Body => ['onload'] } );
+ },
+ qr{tag names must be lower cased},
+ "dies on mixed case tag names",
+);
+
+like(
+ exception {
+ my $hr = HTML::Restrict->new( rules => { body => ['onLoad'] } );
+ },
+ qr{attribute names must be lower cased},
+ "dies on mixed case attributes",
+);
+
+done_testing();
diff --git a/t/memory-leak.t b/t/memory-leak.t
new file mode 100644
index 0000000..7111746
--- /dev/null
+++ b/t/memory-leak.t
@@ -0,0 +1,24 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+use Scalar::Util qw(weaken);
+
+# Ensure that we don't have any circular references between the HTML::Restrict
+# object and its parser.
+my $hr = HTML::Restrict->new;
+my $p = $hr->parser;
+
+my $weak_hr = $hr;
+my $weak_p = $p;
+weaken($weak_hr);
+weaken($weak_p);
+
+undef $hr;
+undef $p;
+
+ok !defined $weak_hr, 'HTML::Restrict freed; no circular reference.';
+ok !defined $weak_p, 'HTML::Parser freed; no circular reference.';
+
+done_testing();
diff --git a/t/perlcriticrc b/t/perlcriticrc
new file mode 100644
index 0000000..d6574dd
--- /dev/null
+++ b/t/perlcriticrc
@@ -0,0 +1,4 @@
+severity = 3
+[-Subroutines::RequireArgUnpacking]
+[-ControlStructures::ProhibitPostfixControls]
+[-Miscellanea::RequireRcsKeywords]
diff --git a/t/pod.t b/t/pod.t
new file mode 100644
index 0000000..ee8b18a
--- /dev/null
+++ b/t/pod.t
@@ -0,0 +1,12 @@
+#!perl -T
+
+use strict;
+use warnings;
+use Test::More;
+
+# Ensure a recent version of Test::Pod
+my $min_tp = 1.22;
+eval "use Test::Pod $min_tp";
+plan skip_all => "Test::Pod $min_tp required for testing POD" if $@;
+
+all_pod_files_ok();
diff --git a/t/release-cpan-changes.t b/t/release-cpan-changes.t
new file mode 100644
index 0000000..08331d3
--- /dev/null
+++ b/t/release-cpan-changes.t
@@ -0,0 +1,18 @@
+
+BEGIN {
+ unless ($ENV{RELEASE_TESTING}) {
+ print qq{1..0 # SKIP these tests are for release candidate testing\n};
+ exit
+ }
+}
+
+use strict;
+use warnings;
+
+# this test was generated with Dist::Zilla::Plugin::Test::CPAN::Changes 0.012
+
+use Test::More 0.96 tests => 1;
+use Test::CPAN::Changes;
+subtest 'changes_ok' => sub {
+ changes_file_ok('Changes');
+};
diff --git a/t/replace_img.t b/t/replace_img.t
new file mode 100644
index 0000000..f40aa90
--- /dev/null
+++ b/t/replace_img.t
@@ -0,0 +1,62 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my @texts = (
+ {
+ label => "<img ... />",
+ html => q{<img alt="foo bar" src="http://example.com/foo.jpg" />},
+ },
+ {
+ label => "<img ... ></img>",
+ html => q{<img alt="foo bar" src="http://example.com/foo.jpg"></img>},
+ },
+);
+
+my @cases = (
+ {
+ label => "default args",
+ args => {},
+ expect => undef,
+ },
+ {
+ label => "replace_img => 0",
+ args => { replace_img => 0 },
+ expect => undef,
+ },
+ {
+ label => "replace_img => 1",
+ args => { replace_img => 1 },
+ expect => q{[IMAGE: foo bar]},
+ },
+ {
+ label => "replace_img => CODE",
+ args => { replace_img => \&replacer },
+ expect => q{[IMAGE REMOVED: foo bar]},
+ },
+);
+
+sub replacer {
+ my ( $tag, $attr, $text ) = @_;
+ return "[IMAGE REMOVED: $attr->{alt}]";
+}
+
+for my $c (@cases) {
+ ok(
+ my $hr = HTML::Restrict->new( debug => 0, %{ $c->{args} } ),
+ "$c->{label}: HTML::Restrict->new(...)"
+ );
+ for my $t (@texts) {
+ is(
+ $hr->process( $t->{html} ), $c->{expect},
+ "$c->{label}: $t->{label}"
+ );
+ }
+}
+
+done_testing();
diff --git a/t/scheme.t b/t/scheme.t
new file mode 100644
index 0000000..8bebd90
--- /dev/null
+++ b/t/scheme.t
@@ -0,0 +1,70 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new(
+ rules => {
+ a => [qw( href )],
+ img => [qw( src /)],
+ blockquote => [qw( cite )],
+ },
+);
+
+$hr->set_uri_schemes( [ 'http', 'https', undef, 'ftp' ] );
+
+cmp_ok(
+ $hr->process('<a href="http://example.com">link</a>'),
+ 'eq', '<a href="http://example.com">link</a>',
+ 'http scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="https://example.com">link</a>'),
+ 'eq', '<a href="https://example.com">link</a>',
+ 'https scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="/some/file">link</a>'),
+ 'eq', '<a href="/some/file">link</a>',
+ 'relative scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="ftp://example.com">link</a>'),
+ 'eq', '<a href="ftp://example.com">link</a>',
+ 'ftp scheme preserved',
+);
+
+cmp_ok(
+ $hr->process('<a href="file://example.com">link</a>'),
+ 'eq', '<a>link</a>',
+ 'file scheme removed',
+);
+
+cmp_ok(
+ $hr->process('<img src="javascript:evil_fc()" />'),
+ 'eq', '<img />',
+ 'img src with javascript removed',
+);
+
+cmp_ok(
+ $hr->process(
+ '<blockquote cite="javascript:evil_fc()">blockquote</blockquote>'),
+ 'eq',
+ '<blockquote>blockquote</blockquote>',
+ 'blockquote cite with javascript removed',
+);
+
+# disable relative schemes
+$hr->set_uri_schemes( [ 'http', 'https', 'ftp' ] );
+
+cmp_ok(
+ $hr->process('<a href="/some/file">link</a>'),
+ 'eq', '<a>link</a>',
+ 'relative scheme removed',
+);
+
+done_testing();
diff --git a/t/stack.t b/t/stack.t
new file mode 100644
index 0000000..b95f0ff
--- /dev/null
+++ b/t/stack.t
@@ -0,0 +1,19 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new;
+
+ok( !@{ $hr->_stripper_stack }, "stack empty" );
+
+push @{ $hr->_stripper_stack }, 'script', 'style', 'pre', 'script';
+$hr->_delete_tag_from_stack('script');
+is_deeply(
+ $hr->_stripper_stack,
+ [ 'script', 'style', 'pre' ],
+ 'deletes from stack in correct order and amount'
+);
+
+done_testing();
diff --git a/t/style.t b/t/style.t
new file mode 100644
index 0000000..2491fa5
--- /dev/null
+++ b/t/style.t
@@ -0,0 +1,24 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my $hr = HTML::Restrict->new( debug => 0 );
+
+my $html = q[<style type="text/css">
+hr {color:sienna;}
+p {margin-left:20px;}
+body {background-image:url("images/back40.gif");}
+</style>];
+
+is( $hr->process($html), undef, "content of style tag removed by default" );
+
+$hr->set_rules( { style => ['type'] } );
+
+is( $hr->process($html), $html, "content of style tag preserved" );
+
+done_testing();
diff --git a/t/xss.t b/t/xss.t
new file mode 100755
index 0000000..0f98789
--- /dev/null
+++ b/t/xss.t
@@ -0,0 +1,35 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new;
+$hr->debug(0);
+$hr->set_rules( { a => [ 'href', 'class' ] } );
+
+my $text = '<a href="javascript:alert(1)">oops!</a>';
+
+my $clean = $hr->process($text);
+is $clean, '<a>oops!</a>', "bad scheme removed";
+
+is $hr->process('<a href="javascript&#58;evil_script()">evil</a>'),
+ '<a>evil</a>', 'bad scheme removed';
+
+foreach my $uri (
+ 'http://vilerichard.com', 'https://vilerichard.com',
+ '//vilerichard.com', '/music'
+ ) {
+ my $img = qq[<a href="$uri">click</a>];
+ is $hr->process($img), $img, "good uri scheme preserved";
+}
+
+is $hr->process(
+ '<a class="&quot;&gt;&lt;script&gt;alert(&quot;oops&quot;);&lt;/script&gt;&lt;a href=&quot;"></a>'
+ ),
+ '<a class="&quot;&gt;&lt;script&gt;alert(&quot;oops&quot;);&lt;/script&gt;&lt;a href=&quot;"></a>',
+ 'attribute value filtered';
+
+done_testing();
diff --git a/tidyall.ini b/tidyall.ini
new file mode 100644
index 0000000..6b3438d
--- /dev/null
+++ b/tidyall.ini
@@ -0,0 +1,26 @@
+[PerlTidy]
+select = **/*.{pl,pm,t,psgi}
+ignore = t/00-*
+ignore = t/author-*
+ignore = t/release-*
+ignore = blib/**/*
+ignore = .build/**/*
+ignore = HTML-Restrict-*/**/*
+argv = --profile=$ROOT/perltidyrc
+
+;[PerlCritic]
+;select = **/*.{pl,pm,t,psgi}
+;ignore = t/00-*
+;ignore = t/author-*
+;ignore = t/release-*
+;ignore = blib/**/*
+;ignore = .build/**/*
+;ignore = HTML-Restrict-*/**/*
+;argv = --profile $ROOT/perlcriticrc --program-extensions .pl --program-extensions .t --program-extensions .psgi
+
+[SortLines::Naturally]
+select = .gitignore
+select = .stopwords
+
+[JSON]
+select = t/**/*json