diff options
author | Jelmer Vernooij <jelmer@debian.org> | 2017-07-02 14:19:05 +0000 |
---|---|---|
committer | Jelmer Vernooij <jelmer@debian.org> | 2017-07-02 14:19:05 +0000 |
commit | 0b25629644387c5e619e923a96f7031a222a3473 (patch) | |
tree | 081bed898da98364c57f401078abbc3bfe22929b | |
parent | 9e9a6c02ccaee0e13ca6792c442766c1e5fe4ac4 (diff) |
New upstream version 0.5.0
-rw-r--r-- | CHANGES.txt | 17 | ||||
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | PKG-INFO | 32 | ||||
-rw-r--r-- | README.html | 34 | ||||
-rw-r--r-- | README.txt | 2 | ||||
-rw-r--r-- | defusedxml.egg-info/PKG-INFO | 873 | ||||
-rw-r--r-- | defusedxml.egg-info/SOURCES.txt | 44 | ||||
-rw-r--r-- | defusedxml.egg-info/dependency_links.txt | 1 | ||||
-rw-r--r-- | defusedxml.egg-info/top_level.txt | 1 | ||||
-rw-r--r-- | defusedxml/ElementTree.py | 48 | ||||
-rw-r--r-- | defusedxml/__init__.py | 5 | ||||
-rw-r--r-- | defusedxml/cElementTree.py | 9 | ||||
-rw-r--r-- | defusedxml/common.py | 69 | ||||
-rw-r--r-- | defusedxml/expatbuilder.py | 2 | ||||
-rw-r--r-- | defusedxml/lxml.py | 14 | ||||
-rw-r--r-- | defusedxml/minidom.py | 12 | ||||
-rw-r--r-- | defusedxml/sax.py | 2 | ||||
-rw-r--r-- | defusedxml/xmlrpc.py | 33 | ||||
-rw-r--r-- | setup.cfg | 12 | ||||
-rw-r--r-- | setup.py | 21 | ||||
-rw-r--r-- | tests.py | 103 |
21 files changed, 1129 insertions, 215 deletions
diff --git a/CHANGES.txt b/CHANGES.txt index 214c562..b262ed9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,23 @@ Changelog ========= +defusedxml 0.5.0 +---------------- + +*Release date: 07-Feb-2017* + +- No changes + +defusedxml 0.5.0.rc1 +-------------------- + +*Release date: 28-Jan-2017* + +- Add compatibility with Python 3.6 +- Drop support for Python 2.6, 3.1, 3.2, 3.3 +- Fix lxml tests (XMLSyntaxError: Detected an entity reference loop) + + defusedxml 0.4.1 ---------------- @@ -7,7 +7,11 @@ PYTHONS=python2.6 python2.7 python3.1 python3.2 python3.3 python3.4 .PHONY: inplace all rebuild test_inplace test fulltests clean distclean .PHONY: sdist install -all: inplace README.html +all: inplace README.html README.md + +README.md: README.txt CHANGES.txt + pandoc --from=rst --to=markdown README.txt > $@ + pandoc --from=rst --to=markdown CHANGES.txt >> $@ README.html: README.txt CHANGES.txt void.css @echo | cat README.txt - CHANGES.txt | \ @@ -54,8 +58,8 @@ whitespace: xargs sed -i 's/[ \t]*$$//' -sdist: README.html - $(PYTHON) setup.py sdist --formats gztar,zip +packages: README.html README.md + $(PYTHON) setup.py packages install: $(PYTHON) setup.py $(SETUPFLAGS) build $(COMPILEFLAGS) @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: defusedxml -Version: 0.4.1 +Version: 0.5.0 Summary: XML bomb protection for Python stdlib modules -Home-page: https://bitbucket.org/tiran/defusedxml +Home-page: https://github.com/tiran/defusedxml Author: Christian Heimes Author-email: christian@python.org License: PSFL -Download-URL: http://pypi.python.org/pypi/defusedxml +Download-URL: https://pypi.python.org/pypi/defusedxml Description: =================================================== defusedxml -- defusing XML bombs and other exploits =================================================== @@ -721,7 +721,7 @@ Description: =================================================== License ======= - Copyright (c) 2013 by Christian Heimes <christian@python.org> + Copyright (c) 2013-2017 by Christian Heimes <christian@python.org> Licensed to PSF under a Contributor Agreement. @@ -787,6 +787,23 @@ Description: =================================================== Changelog ========= + defusedxml 0.5.0 + ---------------- + + *Release date: 07-Feb-2017* + + - No changes + + defusedxml 0.5.0.rc1 + -------------------- + + *Release date: 28-Jan-2017* + + - Add compatibility with Python 3.6 + - Drop support for Python 2.6, 3.1, 3.2, 3.3 + - Fix lxml tests (XMLSyntaxError: Detected an entity reference loop) + + defusedxml 0.4.1 ---------------- @@ -848,10 +865,9 @@ Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: Natural Language :: English Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Programming Language :: Python :: 3.2 -Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 Classifier: Topic :: Text Processing :: Markup :: XML diff --git a/README.html b/README.html index b35c231..41286ec 100644 --- a/README.html +++ b/README.html @@ -3,7 +3,7 @@ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> -<meta name="generator" content="Docutils 0.8.1: http://docutils.sourceforge.net/" /> +<meta name="generator" content="Docutils 0.12: http://docutils.sourceforge.net/" /> <title>defusedxml -- defusing XML bombs and other exploits</title> <style type="text/css"> @@ -553,11 +553,12 @@ by default.</p> <li><a class="reference internal" href="#acknowledgements" id="id45">Acknowledgements</a></li> <li><a class="reference internal" href="#references" id="id46">References</a></li> <li><a class="reference internal" href="#changelog" id="id47">Changelog</a><ul> -<li><a class="reference internal" href="#defusedxml-0-4-1" id="id48">defusedxml 0.4.1</a></li> -<li><a class="reference internal" href="#defusedxml-0-4" id="id49">defusedxml 0.4</a></li> -<li><a class="reference internal" href="#defusedxml-0-3" id="id50">defusedxml 0.3</a></li> -<li><a class="reference internal" href="#defusedxml-0-2" id="id51">defusedxml 0.2</a></li> -<li><a class="reference internal" href="#defusedxml-0-1" id="id52">defusedxml 0.1</a></li> +<li><a class="reference internal" href="#defusedxml-0-5-0-rc1" id="id48">defusedxml 0.5.0.rc1</a></li> +<li><a class="reference internal" href="#defusedxml-0-4-1" id="id49">defusedxml 0.4.1</a></li> +<li><a class="reference internal" href="#defusedxml-0-4" id="id50">defusedxml 0.4</a></li> +<li><a class="reference internal" href="#defusedxml-0-3" id="id51">defusedxml 0.3</a></li> +<li><a class="reference internal" href="#defusedxml-0-2" id="id52">defusedxml 0.2</a></li> +<li><a class="reference internal" href="#defusedxml-0-1" id="id53">defusedxml 0.1</a></li> </ul> </li> </ul> @@ -1254,7 +1255,7 @@ builderFactory.setFeature("http://apache.org/xml/features/nonvalidating/loa </div> <div class="section" id="license"> <h1><a class="toc-backref" href="#id44">License</a></h1> -<p>Copyright (c) 2013 by Christian Heimes <<a class="reference external" href="mailto:christian@python.org">christian@python.org</a>></p> +<p>Copyright (c) 2013-2017 by Christian Heimes <<a class="reference external" href="mailto:christian@python.org">christian@python.org</a>></p> <p>Licensed to PSF under a Contributor Agreement.</p> <p>See <a class="reference external" href="http://www.python.org/psf/license">http://www.python.org/psf/license</a> for licensing details.</p> </div> @@ -1293,8 +1294,17 @@ during working hours as part of semantics's open source initiative.</dd> </div> <div class="section" id="changelog"> <h1><a class="toc-backref" href="#id47">Changelog</a></h1> +<div class="section" id="defusedxml-0-5-0-rc1"> +<h2><a class="toc-backref" href="#id48">defusedxml 0.5.0.rc1</a></h2> +<p><em>Release date: 28-Jan-2017</em></p> +<ul class="simple"> +<li>Add compatibility with Python 3.6</li> +<li>Drop support for Python 2.6, 3.1, 3.2, 3.3</li> +<li>Fix lxml tests (XMLSyntaxError: Detected an entity reference loop)</li> +</ul> +</div> <div class="section" id="defusedxml-0-4-1"> -<h2><a class="toc-backref" href="#id48">defusedxml 0.4.1</a></h2> +<h2><a class="toc-backref" href="#id49">defusedxml 0.4.1</a></h2> <p><em>Release date: 28-Mar-2013</em></p> <ul class="simple"> <li>Add more demo exploits, e.g. python_external.py and Xalan XSLT demos.</li> @@ -1302,7 +1312,7 @@ during working hours as part of semantics's open source initiative.</dd> </ul> </div> <div class="section" id="defusedxml-0-4"> -<h2><a class="toc-backref" href="#id49">defusedxml 0.4</a></h2> +<h2><a class="toc-backref" href="#id50">defusedxml 0.4</a></h2> <p><em>Release date: 25-Feb-2013</em></p> <ul class="simple"> <li>As per <a class="reference external" href="http://seclists.org/oss-sec/2013/q1/340">http://seclists.org/oss-sec/2013/q1/340</a> please REJECT @@ -1315,14 +1325,14 @@ and WebDAV.</li> </ul> </div> <div class="section" id="defusedxml-0-3"> -<h2><a class="toc-backref" href="#id50">defusedxml 0.3</a></h2> +<h2><a class="toc-backref" href="#id51">defusedxml 0.3</a></h2> <p><em>Release date: 19-Feb-2013</em></p> <ul class="simple"> <li>Improve documentation</li> </ul> </div> <div class="section" id="defusedxml-0-2"> -<h2><a class="toc-backref" href="#id51">defusedxml 0.2</a></h2> +<h2><a class="toc-backref" href="#id52">defusedxml 0.2</a></h2> <p><em>Release date: 15-Feb-2013</em></p> <ul class="simple"> <li>Rename ExternalEntitiesForbidden to ExternalReferenceForbidden</li> @@ -1337,7 +1347,7 @@ and WebDAV.</li> </ul> </div> <div class="section" id="defusedxml-0-1"> -<h2><a class="toc-backref" href="#id52">defusedxml 0.1</a></h2> +<h2><a class="toc-backref" href="#id53">defusedxml 0.1</a></h2> <p><em>Release date: 08-Feb-2013</em></p> <ul class="simple"> <li>Initial and internal release for PSRT review</li> @@ -712,7 +712,7 @@ TODO License ======= -Copyright (c) 2013 by Christian Heimes <christian@python.org> +Copyright (c) 2013-2017 by Christian Heimes <christian@python.org> Licensed to PSF under a Contributor Agreement. diff --git a/defusedxml.egg-info/PKG-INFO b/defusedxml.egg-info/PKG-INFO new file mode 100644 index 0000000..a84d5a7 --- /dev/null +++ b/defusedxml.egg-info/PKG-INFO @@ -0,0 +1,873 @@ +Metadata-Version: 1.1 +Name: defusedxml +Version: 0.5.0 +Summary: XML bomb protection for Python stdlib modules +Home-page: https://github.com/tiran/defusedxml +Author: Christian Heimes +Author-email: christian@python.org +License: PSFL +Download-URL: https://pypi.python.org/pypi/defusedxml +Description: =================================================== + defusedxml -- defusing XML bombs and other exploits + =================================================== + + "It's just XML, what could probably go wrong?" + + Christian Heimes <christian@python.org> + + Synopsis + ======== + + The results of an attack on a vulnerable XML library can be fairly dramatic. + With just a few hundred **Bytes** of XML data an attacker can occupy several + **Gigabytes** of memory within **seconds**. An attacker can also keep + CPUs busy for a long time with a small to medium size request. Under some + circumstances it is even possible to access local files on your + server, to circumvent a firewall, or to abuse services to rebound attacks to + third parties. + + The attacks use and abuse less common features of XML and its parsers. The + majority of developers are unacquainted with features such as processing + instructions and entity expansions that XML inherited from SGML. At best + they know about ``<!DOCTYPE>`` from experience with HTML but they are not + aware that a document type definition (DTD) can generate an HTTP request + or load a file from the file system. + + None of the issues is new. They have been known for a long time. Billion + laughs was first reported in 2003. Nevertheless some XML libraries and + applications are still vulnerable and even heavy users of XML are + surprised by these features. It's hard to say whom to blame for the + situation. It's too short sighted to shift all blame on XML parsers and + XML libraries for using insecure default settings. After all they + properly implement XML specifications. Application developers must not rely + that a library is always configured for security and potential harmful data + by default. + + + .. contents:: Table of Contents + :depth: 2 + + + Attack vectors + ============== + + billion laughs / exponential entity expansion + --------------------------------------------- + + The `Billion Laughs`_ attack -- also known as exponential entity expansion -- + uses multiple levels of nested entities. The original example uses 9 levels + of 10 expansions in each level to expand the string ``lol`` to a string of + 3 * 10 :sup:`9` bytes, hence the name "billion laughs". The resulting string + occupies 3 GB (2.79 GiB) of memory; intermediate strings require additional + memory. Because most parsers don't cache the intermediate step for every + expansion it is repeated over and over again. It increases the CPU load even + more. + + An XML document of just a few hundred bytes can disrupt all services on a + machine within seconds. + + Example XML:: + + <!DOCTYPE xmlbomb [ + <!ENTITY a "1234567890" > + <!ENTITY b "&a;&a;&a;&a;&a;&a;&a;&a;"> + <!ENTITY c "&b;&b;&b;&b;&b;&b;&b;&b;"> + <!ENTITY d "&c;&c;&c;&c;&c;&c;&c;&c;"> + ]> + <bomb>&d;</bomb> + + + quadratic blowup entity expansion + --------------------------------- + + A quadratic blowup attack is similar to a `Billion Laughs`_ attack; it abuses + entity expansion, too. Instead of nested entities it repeats one large entity + with a couple of thousand chars over and over again. The attack isn't as + efficient as the exponential case but it avoids triggering countermeasures of + parsers against heavily nested entities. Some parsers limit the depth and + breadth of a single entity but not the total amount of expanded text + throughout an entire XML document. + + A medium-sized XML document with a couple of hundred kilobytes can require a + couple of hundred MB to several GB of memory. When the attack is combined + with some level of nested expansion an attacker is able to achieve a higher + ratio of success. + + :: + + <!DOCTYPE bomb [ + <!ENTITY a "xxxxxxx... a couple of ten thousand chars"> + ]> + <bomb>&a;&a;&a;... repeat</bomb> + + + external entity expansion (remote) + ---------------------------------- + + Entity declarations can contain more than just text for replacement. They can + also point to external resources by public identifiers or system identifiers. + System identifiers are standard URIs. When the URI is a URL (e.g. a + ``http://`` locator) some parsers download the resource from the remote + location and embed them into the XML document verbatim. + + Simple example of a parsed external entity:: + + <!DOCTYPE external [ + <!ENTITY ee SYSTEM "http://www.python.org/some.xml"> + ]> + <root>ⅇ</root> + + The case of parsed external entities works only for valid XML content. The + XML standard also supports unparsed external entities with a + ``NData declaration``. + + External entity expansion opens the door to plenty of exploits. An attacker + can abuse a vulnerable XML library and application to rebound and forward + network requests with the IP address of the server. It highly depends + on the parser and the application what kind of exploit is possible. For + example: + + * An attacker can circumvent firewalls and gain access to restricted + resources as all the requests are made from an internal and trustworthy + IP address, not from the outside. + * An attacker can abuse a service to attack, spy on or DoS your servers but + also third party services. The attack is disguised with the IP address of + the server and the attacker is able to utilize the high bandwidth of a big + machine. + * An attacker can exhaust additional resources on the machine, e.g. with + requests to a service that doesn't respond or responds with very large + files. + * An attacker may gain knowledge, when, how often and from which IP address + a XML document is accessed. + * An attacker could send mail from inside your network if the URL handler + supports ``smtp://`` URIs. + + + external entity expansion (local file) + -------------------------------------- + + External entities with references to local files are a sub-case of external + entity expansion. It's listed as an extra attack because it deserves extra + attention. Some XML libraries such as lxml disable network access by default + but still allow entity expansion with local file access by default. Local + files are either referenced with a ``file://`` URL or by a file path (either + relative or absolute). + + An attacker may be able to access and download all files that can be read by + the application process. This may include critical configuration files, too. + + :: + + <!DOCTYPE external [ + <!ENTITY ee SYSTEM "file:///PATH/TO/simple.xml"> + ]> + <root>ⅇ</root> + + + DTD retrieval + ------------- + + This case is similar to external entity expansion, too. Some XML libraries + like Python's xml.dom.pulldom retrieve document type definitions from remote + or local locations. Several attack scenarios from the external entity case + apply to this issue as well. + + :: + + <?xml version="1.0" encoding="utf-8"?> + <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + <html> + <head/> + <body>text</body> + </html> + + + Python XML Libraries + ==================== + + .. csv-table:: vulnerabilities and features + :header: "kind", "sax", "etree", "minidom", "pulldom", "xmlrpc", "lxml", "genshi" + :widths: 24, 7, 8, 8, 7, 8, 8, 8 + :stub-columns: 0 + + "billion laughs", "**True**", "**True**", "**True**", "**True**", "**True**", "False (1)", "False (5)" + "quadratic blowup", "**True**", "**True**", "**True**", "**True**", "**True**", "**True**", "False (5)" + "external entity expansion (remote)", "**True**", "False (3)", "False (4)", "**True**", "false", "False (1)", "False (5)" + "external entity expansion (local file)", "**True**", "False (3)", "False (4)", "**True**", "false", "**True**", "False (5)" + "DTD retrieval", "**True**", "False", "False", "**True**", "false", "False (1)", "False" + "gzip bomb", "False", "False", "False", "False", "**True**", "**partly** (2)", "False" + "xpath support (7)", "False", "False", "False", "False", "False", "**True**", "False" + "xsl(t) support (7)", "False", "False", "False", "False", "False", "**True**", "False" + "xinclude support (7)", "False", "**True** (6)", "False", "False", "False", "**True** (6)", "**True**" + "C library", "expat", "expat", "expat", "expat", "expat", "libxml2", "expat" + + 1. Lxml is protected against billion laughs attacks and doesn't do network + lookups by default. + 2. libxml2 and lxml are not directly vulnerable to gzip decompression bombs + but they don't protect you against them either. + 3. xml.etree doesn't expand entities and raises a ParserError when an entity + occurs. + 4. minidom doesn't expand entities and simply returns the unexpanded entity + verbatim. + 5. genshi.input of genshi 0.6 doesn't support entity expansion and raises a + ParserError when an entity occurs. + 6. Library has (limited) XInclude support but requires an additional step to + process inclusion. + 7. These are features but they may introduce exploitable holes, see + `Other things to consider`_ + + + Settings in standard library + ---------------------------- + + + xml.sax.handler Features + ........................ + + feature_external_ges (http://xml.org/sax/features/external-general-entities) + disables external entity expansion + + feature_external_pes (http://xml.org/sax/features/external-parameter-entities) + the option is ignored and doesn't modify any functionality + + DOM xml.dom.xmlbuilder.Options + .............................. + + external_parameter_entities + ignored + + external_general_entities + ignored + + external_dtd_subset + ignored + + entities + unsure + + + defusedxml + ========== + + The `defusedxml package`_ (`defusedxml on PyPI`_) + contains several Python-only workarounds and fixes + for denial of service and other vulnerabilities in Python's XML libraries. + In order to benefit from the protection you just have to import and use the + listed functions / classes from the right defusedxml module instead of the + original module. Merely `defusedxml.xmlrpc`_ is implemented as monkey patch. + + Instead of:: + + >>> from xml.etree.ElementTree import parse + >>> et = parse(xmlfile) + + alter code to:: + + >>> from defusedxml.ElementTree import parse + >>> et = parse(xmlfile) + + Additionally the package has an **untested** function to monkey patch + all stdlib modules with ``defusedxml.defuse_stdlib()``. + + All functions and parser classes accept three additional keyword arguments. + They return either the same objects as the original functions or compatible + subclasses. + + forbid_dtd (default: False) + disallow XML with a ``<!DOCTYPE>`` processing instruction and raise a + *DTDForbidden* exception when a DTD processing instruction is found. + + forbid_entities (default: True) + disallow XML with ``<!ENTITY>`` declarations inside the DTD and raise an + *EntitiesForbidden* exception when an entity is declared. + + forbid_external (default: True) + disallow any access to remote or local resources in external entities + or DTD and raising an *ExternalReferenceForbidden* exception when a DTD + or entity references an external resource. + + + defusedxml (package) + -------------------- + + DefusedXmlException, DTDForbidden, EntitiesForbidden, + ExternalReferenceForbidden, NotSupportedError + + defuse_stdlib() (*experimental*) + + + defusedxml.cElementTree + ----------------------- + + parse(), iterparse(), fromstring(), XMLParser + + + defusedxml.ElementTree + ----------------------- + + parse(), iterparse(), fromstring(), XMLParser + + + defusedxml.expatreader + ---------------------- + + create_parser(), DefusedExpatParser + + + defusedxml.sax + -------------- + + parse(), parseString(), create_parser() + + + defusedxml.expatbuilder + ----------------------- + + parse(), parseString(), DefusedExpatBuilder, DefusedExpatBuilderNS + + + defusedxml.minidom + ------------------ + + parse(), parseString() + + + defusedxml.pulldom + ------------------ + + parse(), parseString() + + + defusedxml.xmlrpc + ----------------- + + The fix is implemented as monkey patch for the stdlib's xmlrpc package (3.x) + or xmlrpclib module (2.x). The function `monkey_patch()` enables the fixes, + `unmonkey_patch()` removes the patch and puts the code in its former state. + + The monkey patch protects against XML related attacks as well as + decompression bombs and excessively large requests or responses. The default + setting is 30 MB for requests, responses and gzip decompression. You can + modify the default by changing the module variable `MAX_DATA`. A value of + `-1` disables the limit. + + + defusedxml.lxml + --------------- + + The module acts as an *example* how you could protect code that uses + lxml.etree. It implements a custom Element class that filters out + Entity instances, a custom parser factory and a thread local storage for + parser instances. It also has a check_docinfo() function which inspects + a tree for internal or external DTDs and entity declarations. In order to + check for entities lxml > 3.0 is required. + + parse(), fromstring() + RestrictedElement, GlobalParserTLS, getDefaultParser(), check_docinfo() + + + defusedexpat + ============ + + The `defusedexpat package`_ (`defusedexpat on PyPI`_) + comes with binary extensions and a + `modified expat`_ libary instead of the standard `expat parser`_. It's + basically a stand-alone version of the patches for Python's standard + library C extensions. + + Modifications in expat + ---------------------- + + new definitions:: + + XML_BOMB_PROTECTION + XML_DEFAULT_MAX_ENTITY_INDIRECTIONS + XML_DEFAULT_MAX_ENTITY_EXPANSIONS + XML_DEFAULT_RESET_DTD + + new XML_FeatureEnum members:: + + XML_FEATURE_MAX_ENTITY_INDIRECTIONS + XML_FEATURE_MAX_ENTITY_EXPANSIONS + XML_FEATURE_IGNORE_DTD + + new XML_Error members:: + + XML_ERROR_ENTITY_INDIRECTIONS + XML_ERROR_ENTITY_EXPANSION + + new API functions:: + + int XML_GetFeature(XML_Parser parser, + enum XML_FeatureEnum feature, + long *value); + int XML_SetFeature(XML_Parser parser, + enum XML_FeatureEnum feature, + long value); + int XML_GetFeatureDefault(enum XML_FeatureEnum feature, + long *value); + int XML_SetFeatureDefault(enum XML_FeatureEnum feature, + long value); + + XML_FEATURE_MAX_ENTITY_INDIRECTIONS + Limit the amount of indirections that are allowed to occur during the + expansion of a nested entity. A counter starts when an entity reference + is encountered. It resets after the entity is fully expanded. The limit + protects the parser against exponential entity expansion attacks (aka + billion laughs attack). When the limit is exceeded the parser stops and + fails with `XML_ERROR_ENTITY_INDIRECTIONS`. + A value of 0 disables the protection. + + Supported range + 0 .. UINT_MAX + Default + 40 + + XML_FEATURE_MAX_ENTITY_EXPANSIONS + Limit the total length of all entity expansions throughout the entire + document. The lengths of all entities are accumulated in a parser variable. + The setting protects against quadratic blowup attacks (lots of expansions + of a large entity declaration). When the sum of all entities exceeds + the limit, the parser stops and fails with `XML_ERROR_ENTITY_EXPANSION`. + A value of 0 disables the protection. + + Supported range + 0 .. UINT_MAX + Default + 8 MiB + + XML_FEATURE_RESET_DTD + Reset all DTD information after the <!DOCTYPE> block has been parsed. When + the flag is set (default: false) all DTD information after the + endDoctypeDeclHandler has been called. The flag can be set inside the + endDoctypeDeclHandler. Without DTD information any entity reference in + the document body leads to `XML_ERROR_UNDEFINED_ENTITY`. + + Supported range + 0, 1 + Default + 0 + + + How to avoid XML vulnerabilities + ================================ + + Best practices + -------------- + + * Don't allow DTDs + * Don't expand entities + * Don't resolve externals + * Limit parse depth + * Limit total input size + * Limit parse time + * Favor a SAX or iterparse-like parser for potential large data + * Validate and properly quote arguments to XSL transformations and + XPath queries + * Don't use XPath expression from untrusted sources + * Don't apply XSL transformations that come untrusted sources + + (based on Brad Hill's `Attacking XML Security`_) + + + Other things to consider + ======================== + + XML, XML parsers and processing libraries have more features and possible + issue that could lead to DoS vulnerabilities or security exploits in + applications. I have compiled an incomplete list of theoretical issues that + need further research and more attention. The list is deliberately pessimistic + and a bit paranoid, too. It contains things that might go wrong under daffy + circumstances. + + + attribute blowup / hash collision attack + ---------------------------------------- + + XML parsers may use an algorithm with quadratic runtime O(n :sup:`2`) to + handle attributes and namespaces. If it uses hash tables (dictionaries) to + store attributes and namespaces the implementation may be vulnerable to + hash collision attacks, thus reducing the performance to O(n :sup:`2`) again. + In either case an attacker is able to forge a denial of service attack with + an XML document that contains thousands upon thousands of attributes in + a single node. + + I haven't researched yet if expat, pyexpat or libxml2 are vulnerable. + + + decompression bomb + ------------------ + + The issue of decompression bombs (aka `ZIP bomb`_) apply to all XML libraries + that can parse compressed XML stream like gzipped HTTP streams or LZMA-ed + files. For an attacker it can reduce the amount of transmitted data by three + magnitudes or more. Gzip is able to compress 1 GiB zeros to roughly 1 MB, + lzma is even better:: + + $ dd if=/dev/zero bs=1M count=1024 | gzip > zeros.gz + $ dd if=/dev/zero bs=1M count=1024 | lzma -z > zeros.xy + $ ls -sh zeros.* + 1020K zeros.gz + 148K zeros.xy + + None of Python's standard XML libraries decompress streams except for + ``xmlrpclib``. The module is vulnerable <http://bugs.python.org/issue16043> + to decompression bombs. + + lxml can load and process compressed data through libxml2 transparently. + libxml2 can handle even very large blobs of compressed data efficiently + without using too much memory. But it doesn't protect applications from + decompression bombs. A carefully written SAX or iterparse-like approach can + be safe. + + + Processing Instruction + ---------------------- + + `PI`_'s like:: + + <?xml-stylesheet type="text/xsl" href="style.xsl"?> + + may impose more threats for XML processing. It depends if and how a + processor handles processing instructions. The issue of URL retrieval with + network or local file access apply to processing instructions, too. + + + Other DTD features + ------------------ + + `DTD`_ has more features like ``<!NOTATION>``. I haven't researched how + these features may be a security threat. + + + XPath + ----- + + XPath statements may introduce DoS vulnerabilities. Code should never execute + queries from untrusted sources. An attacker may also be able to create a XML + document that makes certain XPath queries costly or resource hungry. + + + XPath injection attacks + ----------------------- + + XPath injeciton attacks pretty much work like SQL injection attacks. + Arguments to XPath queries must be quoted and validated properly, especially + when they are taken from the user. The page `Avoid the dangers of XPath injection`_ + list some ramifications of XPath injections. + + Python's standard library doesn't have XPath support. Lxml supports + parameterized XPath queries which does proper quoting. You just have to use + its xpath() method correctly:: + + # DON'T + >>> tree.xpath("/tag[@id='%s']" % value) + + # instead do + >>> tree.xpath("/tag[@id=$tagid]", tagid=name) + + + XInclude + -------- + + `XML Inclusion`_ is another way to load and include external files:: + + <root xmlns:xi="http://www.w3.org/2001/XInclude"> + <xi:include href="filename.txt" parse="text" /> + </root> + + This feature should be disabled when XML files from an untrusted source are + processed. Some Python XML libraries and libxml2 support XInclude but don't + have an option to sandbox inclusion and limit it to allowed directories. + + + XMLSchema location + ------------------ + + A validating XML parser may download schema files from the information in a + ``xsi:schemaLocation`` attribute. + + :: + + <ead xmlns="urn:isbn:1-931666-22-9" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd"> + </ead> + + + XSL Transformation + ------------------ + + You should keep in mind that XSLT is a Turing complete language. Never + process XSLT code from unknown or untrusted source! XSLT processors may + allow you to interact with external resources in ways you can't even imagine. + Some processors even support extensions that allow read/write access to file + system, access to JRE objects or scripting with Jython. + + Example from `Attacking XML Security`_ for Xalan-J:: + + <xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:rt="http://xml.apache.org/xalan/java/java.lang.Runtime" + xmlns:ob="http://xml.apache.org/xalan/java/java.lang.Object" + exclude-result-prefixes= "rt ob"> + <xsl:template match="/"> + <xsl:variable name="runtimeObject" select="rt:getRuntime()"/> + <xsl:variable name="command" + select="rt:exec($runtimeObject, 'c:\Windows\system32\cmd.exe')"/> + <xsl:variable name="commandAsString" select="ob:toString($command)"/> + <xsl:value-of select="$commandAsString"/> + </xsl:template> + </xsl:stylesheet> + + + Related CVEs + ============ + + CVE-2013-1664 + Unrestricted entity expansion induces DoS vulnerabilities in Python XML + libraries (XML bomb) + + CVE-2013-1665 + External entity expansion in Python XML libraries inflicts potential + security flaws and DoS vulnerabilities + + + Other languages / frameworks + ============================= + + Several other programming languages and frameworks are vulnerable as well. A + couple of them are affected by the fact that libxml2 up to 2.9.0 has no + protection against quadratic blowup attacks. Most of them have potential + dangerous default settings for entity expansion and external entities, too. + + Perl + ---- + + Perl's XML::Simple is vulnerable to quadratic entity expansion and external + entity expansion (both local and remote). + + + Ruby + ---- + + Ruby's REXML document parser is vulnerable to entity expansion attacks + (both quadratic and exponential) but it doesn't do external entity + expansion by default. In order to counteract entity expansion you have to + disable the feature:: + + REXML::Document.entity_expansion_limit = 0 + + libxml-ruby and hpricot don't expand entities in their default configuration. + + + PHP + --- + + PHP's SimpleXML API is vulnerable to quadratic entity expansion and loads + entites from local and remote resources. The option ``LIBXML_NONET`` disables + network access but still allows local file access. ``LIBXML_NOENT`` seems to + have no effect on entity expansion in PHP 5.4.6. + + + C# / .NET / Mono + ---------------- + + Information in `XML DoS and Defenses (MSDN)`_ suggest that .NET is + vulnerable with its default settings. The article contains code snippets + how to create a secure XML reader:: + + XmlReaderSettings settings = new XmlReaderSettings(); + settings.ProhibitDtd = false; + settings.MaxCharactersFromEntities = 1024; + settings.XmlResolver = null; + XmlReader reader = XmlReader.Create(stream, settings); + + + Java + ---- + + Untested. The documentation of Xerces and its `Xerces SecurityMananger`_ + sounds like Xerces is also vulnerable to billion laugh attacks with its + default settings. It also does entity resolving when an + ``org.xml.sax.EntityResolver`` is configured. I'm not yet sure about the + default setting here. + + Java specialists suggest to have a custom builder factory:: + + DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); + builderFactory.setXIncludeAware(False); + builderFactory.setExpandEntityReferences(False); + builderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, True); + # either + builderFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", True); + # or if you need DTDs + builderFactory.setFeature("http://xml.org/sax/features/external-general-entities", False); + builderFactory.setFeature("http://xml.org/sax/features/external-parameter-entities", False); + builderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", False); + builderFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", False); + + + TODO + ==== + + * DOM: Use xml.dom.xmlbuilder options for entity handling + * SAX: take feature_external_ges and feature_external_pes (?) into account + * test experimental monkey patching of stdlib modules + * improve documentation + + + License + ======= + + Copyright (c) 2013-2017 by Christian Heimes <christian@python.org> + + Licensed to PSF under a Contributor Agreement. + + See http://www.python.org/psf/license for licensing details. + + + Acknowledgements + ================ + + Brett Cannon (Python Core developer) + review and code cleanup + + Antoine Pitrou (Python Core developer) + code review + + Aaron Patterson, Ben Murphy and Michael Koziarski (Ruby community) + Many thanks to Aaron, Ben and Michael from the Ruby community for their + report and assistance. + + Thierry Carrez (OpenStack) + Many thanks to Thierry for his report to the Python Security Response + Team on behalf of the OpenStack security team. + + Carl Meyer (Django) + Many thanks to Carl for his report to PSRT on behalf of the Django security + team. + + Daniel Veillard (libxml2) + Many thanks to Daniel for his insight and assistance with libxml2. + + semantics GmbH (http://www.semantics.de/) + Many thanks to my employer semantics for letting me work on the issue + during working hours as part of semantics's open source initiative. + + + References + ========== + + * `XML DoS and Defenses (MSDN)`_ + * `Billion Laughs`_ on Wikipedia + * `ZIP bomb`_ on Wikipedia + * `Configure SAX parsers for secure processing`_ + * `Testing for XML Injection`_ + + .. _defusedxml package: https://bitbucket.org/tiran/defusedxml + .. _defusedxml on PyPI: https://pypi.python.org/pypi/defusedxml + .. _defusedexpat package: https://bitbucket.org/tiran/defusedexpat + .. _defusedexpat on PyPI: https://pypi.python.org/pypi/defusedexpat + .. _modified expat: https://bitbucket.org/tiran/expat + .. _expat parser: http://expat.sourceforge.net/ + .. _Attacking XML Security: https://www.isecpartners.com/media/12976/iSEC-HILL-Attacking-XML-Security-bh07.pdf + .. _Billion Laughs: http://en.wikipedia.org/wiki/Billion_laughs + .. _XML DoS and Defenses (MSDN): http://msdn.microsoft.com/en-us/magazine/ee335713.aspx + .. _ZIP bomb: http://en.wikipedia.org/wiki/Zip_bomb + .. _DTD: http://en.wikipedia.org/wiki/Document_Type_Definition + .. _PI: https://en.wikipedia.org/wiki/Processing_Instruction + .. _Avoid the dangers of XPath injection: http://www.ibm.com/developerworks/xml/library/x-xpathinjection/index.html + .. _Configure SAX parsers for secure processing: http://www.ibm.com/developerworks/xml/library/x-tipcfsx/index.html + .. _Testing for XML Injection: https://www.owasp.org/index.php/Testing_for_XML_Injection_(OWASP-DV-008) + .. _Xerces SecurityMananger: http://xerces.apache.org/xerces2-j/javadocs/xerces2/org/apache/xerces/util/SecurityManager.html + .. _XML Inclusion: http://www.w3.org/TR/xinclude/#include_element + + Changelog + ========= + + defusedxml 0.5.0 + ---------------- + + *Release date: 07-Feb-2017* + + - No changes + + defusedxml 0.5.0.rc1 + -------------------- + + *Release date: 28-Jan-2017* + + - Add compatibility with Python 3.6 + - Drop support for Python 2.6, 3.1, 3.2, 3.3 + - Fix lxml tests (XMLSyntaxError: Detected an entity reference loop) + + + defusedxml 0.4.1 + ---------------- + + *Release date: 28-Mar-2013* + + - Add more demo exploits, e.g. python_external.py and Xalan XSLT demos. + - Improved documentation. + + + defusedxml 0.4 + -------------- + + *Release date: 25-Feb-2013* + + - As per http://seclists.org/oss-sec/2013/q1/340 please REJECT + CVE-2013-0278, CVE-2013-0279 and CVE-2013-0280 and use CVE-2013-1664, + CVE-2013-1665 for OpenStack/etc. + - Add missing parser_list argument to sax.make_parser(). The argument is + ignored, though. (thanks to Florian Apolloner) + - Add demo exploit for external entity attack on Python's SAX parser, XML-RPC + and WebDAV. + + + defusedxml 0.3 + -------------- + + *Release date: 19-Feb-2013* + + - Improve documentation + + + defusedxml 0.2 + -------------- + + *Release date: 15-Feb-2013* + + - Rename ExternalEntitiesForbidden to ExternalReferenceForbidden + - Rename defusedxml.lxml.check_dtd() to check_docinfo() + - Unify argument names in callbacks + - Add arguments and formatted representation to exceptions + - Add forbid_external argument to all functions and classs + - More tests + - LOTS of documentation + - Add example code for other languages (Ruby, Perl, PHP) and parsers (Genshi) + - Add protection against XML and gzip attacks to xmlrpclib + + defusedxml 0.1 + -------------- + + *Release date: 08-Feb-2013* + + - Initial and internal release for PSRT review + +Keywords: xml bomb DoS +Platform: all +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Python Software Foundation License +Classifier: Natural Language :: English +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Topic :: Text Processing :: Markup :: XML diff --git a/defusedxml.egg-info/SOURCES.txt b/defusedxml.egg-info/SOURCES.txt new file mode 100644 index 0000000..b80c2b7 --- /dev/null +++ b/defusedxml.egg-info/SOURCES.txt @@ -0,0 +1,44 @@ +CHANGES.txt +LICENSE +MANIFEST.in +Makefile +README.html +README.txt +setup.cfg +setup.py +tests.py +void.css +defusedxml/ElementTree.py +defusedxml/__init__.py +defusedxml/cElementTree.py +defusedxml/common.py +defusedxml/expatbuilder.py +defusedxml/expatreader.py +defusedxml/lxml.py +defusedxml/minidom.py +defusedxml/pulldom.py +defusedxml/sax.py +defusedxml/xmlrpc.py +defusedxml.egg-info/PKG-INFO +defusedxml.egg-info/SOURCES.txt +defusedxml.egg-info/dependency_links.txt +defusedxml.egg-info/top_level.txt +other/README.txt +other/exploit_webdav.py +other/exploit_xmlrpc.py +other/perl.pl +other/php.php +other/python_external.py +other/python_genshi.py +other/ruby-hpricot.rb +other/ruby-libxml.rb +other/ruby-rexml.rb +xmltestdata/cyclic.xml +xmltestdata/dtd.xml +xmltestdata/external.xml +xmltestdata/external_file.xml +xmltestdata/quadratic.xml +xmltestdata/simple-ns.xml +xmltestdata/simple.xml +xmltestdata/xmlbomb.xml +xmltestdata/xmlbomb2.xml
\ No newline at end of file diff --git a/defusedxml.egg-info/dependency_links.txt b/defusedxml.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/defusedxml.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/defusedxml.egg-info/top_level.txt b/defusedxml.egg-info/top_level.txt new file mode 100644 index 0000000..36969f2 --- /dev/null +++ b/defusedxml.egg-info/top_level.txt @@ -0,0 +1 @@ +defusedxml diff --git a/defusedxml/ElementTree.py b/defusedxml/ElementTree.py index a2f1f58..41b2ea8 100644 --- a/defusedxml/ElementTree.py +++ b/defusedxml/ElementTree.py @@ -8,26 +8,27 @@ from __future__ import print_function, absolute_import import sys -from .common import PY3, PY26, PY31 +from xml.etree.ElementTree import TreeBuilder as _TreeBuilder +from xml.etree.ElementTree import parse as _parse +from xml.etree.ElementTree import tostring + +from .common import PY3 + + if PY3: import importlib else: from xml.etree.ElementTree import XMLParser as _XMLParser from xml.etree.ElementTree import iterparse as _iterparse - if PY26: - from xml.parsers.expat import ExpatError as ParseError - else: - from xml.etree.ElementTree import ParseError - _IterParseIterator = None -from xml.etree.ElementTree import TreeBuilder as _TreeBuilder -from xml.etree.ElementTree import parse as _parse -from xml.etree.ElementTree import tostring + from xml.etree.ElementTree import ParseError + from .common import (DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden, _generate_etree_functions) __origin__ = "xml.etree.ElementTree" + def _get_py3_cls(): """Python 3.3 hides the pure Python code but defusedxml requires it. @@ -49,32 +50,26 @@ def _get_py3_cls(): _XMLParser = pure_pymod.XMLParser _iterparse = pure_pymod.iterparse - if PY31: - _IterParseIterator = None - from xml.parsers.expat import ExpatError as ParseError - else: - _IterParseIterator = pure_pymod._IterParseIterator - ParseError = pure_pymod.ParseError + ParseError = pure_pymod.ParseError + + return _XMLParser, _iterparse, ParseError - return _XMLParser, _iterparse, _IterParseIterator, ParseError if PY3: - _XMLParser, _iterparse, _IterParseIterator, ParseError = _get_py3_cls() + _XMLParser, _iterparse, ParseError = _get_py3_cls() class DefusedXMLParser(_XMLParser): + def __init__(self, html=0, target=None, encoding=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): - if PY26 or PY31: - _XMLParser.__init__(self, html, target) - else: - # Python 2.x old style class - _XMLParser.__init__(self, html, target, encoding) + # Python 2.x old style class + _XMLParser.__init__(self, html, target, encoding) self.forbid_dtd = forbid_dtd self.forbid_entities = forbid_entities self.forbid_external = forbid_external - if PY3 and not PY31: + if PY3: parser = self.parser else: parser = self._parser @@ -108,5 +103,10 @@ class DefusedXMLParser(_XMLParser): XMLTreeBuilder = XMLParse = DefusedXMLParser parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser, - _TreeBuilder, _IterParseIterator, _parse, _iterparse) + _TreeBuilder, _parse, + _iterparse) XML = fromstring + + +__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse', + 'parse', 'tostring'] diff --git a/defusedxml/__init__.py b/defusedxml/__init__.py index 98a7f14..590a5a9 100644 --- a/defusedxml/__init__.py +++ b/defusedxml/__init__.py @@ -11,6 +11,7 @@ from .common import (DefusedXmlException, DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden, NotSupportedError, _apply_defusing) + def defuse_stdlib(): """Monkey patch and defuse all stdlib packages @@ -38,5 +39,7 @@ def defuse_stdlib(): return defused -__version__ = "0.4.1" +__version__ = "0.5.0" +__all__ = ['DefusedXmlException', 'DTDForbidden', 'EntitiesForbidden', + 'ExternalReferenceForbidden', 'NotSupportedError'] diff --git a/defusedxml/cElementTree.py b/defusedxml/cElementTree.py index 0e37c8f..cc13689 100644 --- a/defusedxml/cElementTree.py +++ b/defusedxml/cElementTree.py @@ -13,13 +13,18 @@ from xml.etree.cElementTree import tostring # iterparse from ElementTree! from xml.etree.ElementTree import iterparse as _iterparse -from .ElementTree import DefusedXMLParser, _IterParseIterator +from .ElementTree import DefusedXMLParser from .common import _generate_etree_functions __origin__ = "xml.etree.cElementTree" + XMLTreeBuilder = XMLParse = DefusedXMLParser parse, iterparse, fromstring = _generate_etree_functions(DefusedXMLParser, - _TreeBuilder, _IterParseIterator, _parse, _iterparse) + _TreeBuilder, _parse, + _iterparse) XML = fromstring + +__all__ = ['XML', 'XMLParse', 'XMLTreeBuilder', 'fromstring', 'iterparse', + 'parse', 'tostring'] diff --git a/defusedxml/common.py b/defusedxml/common.py index 5e5f8a2..668b609 100644 --- a/defusedxml/common.py +++ b/defusedxml/common.py @@ -6,16 +6,14 @@ """Common constants, exceptions and helpe functions """ import sys -from types import MethodType PY3 = sys.version_info[0] == 3 -PY26 = sys.version_info[:2] == (2, 6) -PY31 = sys.version_info[:2] == (3, 1) class DefusedXmlException(ValueError): """Base exception """ + def __repr__(self): return str(self) @@ -23,6 +21,7 @@ class DefusedXmlException(ValueError): class DTDForbidden(DefusedXmlException): """Document type definition is forbidden """ + def __init__(self, name, sysid, pubid): super(DTDForbidden, self).__init__() self.name = name @@ -37,6 +36,7 @@ class DTDForbidden(DefusedXmlException): class EntitiesForbidden(DefusedXmlException): """Entity definition is forbidden """ + def __init__(self, name, value, base, sysid, pubid, notation_name): super(EntitiesForbidden, self).__init__() self.name = name @@ -54,6 +54,7 @@ class EntitiesForbidden(DefusedXmlException): class ExternalReferenceForbidden(DefusedXmlException): """Resolving an external reference is forbidden """ + def __init__(self, context, base, sysid, pubid): super(ExternalReferenceForbidden, self).__init__() self.context = context @@ -85,7 +86,7 @@ def _apply_defusing(defused_mod): def _generate_etree_functions(DefusedXMLParser, _TreeBuilder, - _IterParseIterator, _parse, _iterparse): + _parse, _iterparse): """Factory for functions needed by etree, dependent on whether cElementTree or ElementTree is used.""" @@ -98,57 +99,14 @@ def _generate_etree_functions(DefusedXMLParser, _TreeBuilder, forbid_external=forbid_external) return _parse(source, parser) - if PY26 or PY31: - def bind(xmlparser, funcname, hookname): - func = getattr(DefusedXMLParser, funcname) - if PY26: - # unbound -> function - func = func.__func__ - method = MethodType(func, xmlparser, xmlparser.__class__) - else: - method = MethodType(func, xmlparser) - # set hook - setattr(xmlparser._parser, hookname, method) - - def iterparse(source, events=None, forbid_dtd=False, - forbid_entities=True, forbid_external=True): - it = _iterparse(source, events) - xmlparser = it._parser - if forbid_dtd: - bind(xmlparser, "defused_start_doctype_decl", - "StartDoctypeDeclHandler") - if forbid_entities: - bind(xmlparser, "defused_entity_decl", - "EntityDeclHandler") - bind(xmlparser, "defused_unparsed_entity_decl", - "UnparsedEntityDeclHandler") - if forbid_external: - bind(xmlparser, "defused_external_entity_ref_handler", - "ExternalEntityRefHandler") - return it - elif PY3: - def iterparse(source, events=None, parser=None, forbid_dtd=False, - forbid_entities=True, forbid_external=True): - close_source = False - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - if not parser: - parser = DefusedXMLParser(target=_TreeBuilder(), - forbid_dtd=forbid_dtd, - forbid_entities=forbid_entities, - forbid_external=forbid_external) - return _IterParseIterator(source, events, parser, close_source) - else: - # Python 2.7 - def iterparse(source, events=None, parser=None, forbid_dtd=False, - forbid_entities=True, forbid_external=True): - if parser is None: - parser = DefusedXMLParser(target=_TreeBuilder(), - forbid_dtd=forbid_dtd, - forbid_entities=forbid_entities, - forbid_external=forbid_external) - return _iterparse(source, events, parser) + def iterparse(source, events=None, parser=None, forbid_dtd=False, + forbid_entities=True, forbid_external=True): + if parser is None: + parser = DefusedXMLParser(target=_TreeBuilder(), + forbid_dtd=forbid_dtd, + forbid_entities=forbid_entities, + forbid_external=forbid_external) + return _iterparse(source, events, parser) def fromstring(text, forbid_dtd=False, forbid_entities=True, forbid_external=True): @@ -159,5 +117,4 @@ def _generate_etree_functions(DefusedXMLParser, _TreeBuilder, parser.feed(text) return parser.close() - return parse, iterparse, fromstring diff --git a/defusedxml/expatbuilder.py b/defusedxml/expatbuilder.py index d81fd2f..0eb6b91 100644 --- a/defusedxml/expatbuilder.py +++ b/defusedxml/expatbuilder.py @@ -49,7 +49,7 @@ class DefusedExpatBuilder(_ExpatBuilder): if self.forbid_dtd: parser.StartDoctypeDeclHandler = self.defused_start_doctype_decl if self.forbid_entities: - #if self._options.entities: + # if self._options.entities: parser.EntityDeclHandler = self.defused_entity_decl parser.UnparsedEntityDeclHandler = self.defused_unparsed_entity_decl if self.forbid_external: diff --git a/defusedxml/lxml.py b/defusedxml/lxml.py index 94b497c..7f3ee0b 100644 --- a/defusedxml/lxml.py +++ b/defusedxml/lxml.py @@ -40,7 +40,8 @@ class RestrictedElement(_etree.ElementBase): return self._filter(iterator) def iterchildren(self, tag=None, reversed=False): - iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed) + iterator = super(RestrictedElement, self).iterchildren( + tag=tag, reversed=reversed) return self._filter(iterator) def iter(self, tag=None, *tags): @@ -48,11 +49,13 @@ class RestrictedElement(_etree.ElementBase): return self._filter(iterator) def iterdescendants(self, tag=None, *tags): - iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags) + iterator = super(RestrictedElement, + self).iterdescendants(tag=tag, *tags) return self._filter(iterator) def itersiblings(self, tag=None, preceding=False): - iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding) + iterator = super(RestrictedElement, self).itersiblings( + tag=tag, preceding=preceding) return self._filter(iterator) def getchildren(self): @@ -69,8 +72,8 @@ class GlobalParserTLS(threading.local): """ parser_config = { 'resolve_entities': False, - #'remove_comments': True, - #'remove_pis': True, + # 'remove_comments': True, + # 'remove_pis': True, } element_class = RestrictedElement @@ -142,6 +145,7 @@ def fromstring(text, parser=None, base_url=None, forbid_dtd=False, check_docinfo(elementtree, forbid_dtd, forbid_entities) return rootelement + XML = fromstring diff --git a/defusedxml/minidom.py b/defusedxml/minidom.py index 1ce6946..0fd8684 100644 --- a/defusedxml/minidom.py +++ b/defusedxml/minidom.py @@ -13,6 +13,7 @@ from . import pulldom as _pulldom __origin__ = "xml.dom.minidom" + def parse(file, parser=None, bufsize=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): """Parse a file into a DOM by filename or file object.""" @@ -22,17 +23,18 @@ def parse(file, parser=None, bufsize=None, forbid_dtd=False, forbid_external=forbid_external) else: return _do_pulldom_parse(_pulldom.parse, (file,), - {'parser': parser, 'bufsize': bufsize, - 'forbid_dtd': forbid_dtd, 'forbid_entities': forbid_entities, - 'forbid_external': forbid_external}) + {'parser': parser, 'bufsize': bufsize, + 'forbid_dtd': forbid_dtd, 'forbid_entities': forbid_entities, + 'forbid_external': forbid_external}) + def parseString(string, parser=None, forbid_dtd=False, forbid_entities=True, forbid_external=True): """Parse a file into a DOM from a string.""" if parser is None: return _expatbuilder.parseString(string, forbid_dtd=forbid_dtd, - forbid_entities=forbid_entities, - forbid_external=forbid_external) + forbid_entities=forbid_entities, + forbid_external=forbid_external) else: return _do_pulldom_parse(_pulldom.parseString, (string,), {'parser': parser, 'forbid_dtd': forbid_dtd, diff --git a/defusedxml/sax.py b/defusedxml/sax.py index 4305df0..534d0ca 100644 --- a/defusedxml/sax.py +++ b/defusedxml/sax.py @@ -14,6 +14,7 @@ from . import expatreader __origin__ = "xml.sax" + def parse(source, handler, errorHandler=_ErrorHandler(), forbid_dtd=False, forbid_entities=True, forbid_external=True): parser = make_parser() @@ -43,5 +44,6 @@ def parseString(string, handler, errorHandler=_ErrorHandler(), inpsrc.setByteStream(BytesIO(string)) parser.parse(inpsrc) + def make_parser(parser_list=[]): return expatreader.create_parser() diff --git a/defusedxml/xmlrpc.py b/defusedxml/xmlrpc.py index 0829916..2a456e6 100644 --- a/defusedxml/xmlrpc.py +++ b/defusedxml/xmlrpc.py @@ -11,25 +11,23 @@ from __future__ import print_function, absolute_import import io -from .common import (DTDForbidden, EntitiesForbidden, - ExternalReferenceForbidden, PY3, PY31, PY26) +from .common import ( + DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden, PY3) if PY3: __origin__ = "xmlrpc.client" from xmlrpc.client import ExpatParser from xmlrpc import client as xmlrpc_client from xmlrpc import server as xmlrpc_server - if not PY31: - from xmlrpc.client import gzip_decode as _orig_gzip_decode - from xmlrpc.client import GzipDecodedResponse as _OrigGzipDecodedResponse + from xmlrpc.client import gzip_decode as _orig_gzip_decode + from xmlrpc.client import GzipDecodedResponse as _OrigGzipDecodedResponse else: __origin__ = "xmlrpclib" from xmlrpclib import ExpatParser import xmlrpclib as xmlrpc_client xmlrpc_server = None - if not PY26: - from xmlrpclib import gzip_decode as _orig_gzip_decode - from xmlrpclib import GzipDecodedResponse as _OrigGzipDecodedResponse + from xmlrpclib import gzip_decode as _orig_gzip_decode + from xmlrpclib import GzipDecodedResponse as _OrigGzipDecodedResponse try: import gzip @@ -41,7 +39,8 @@ except ImportError: # Also used to limit maximum amount of gzip decoded data in order to prevent # decompression bombs # A value of -1 or smaller disables the limit -MAX_DATA = 30 * 1024 * 1024 # 30 MB +MAX_DATA = 30 * 1024 * 1024 # 30 MB + def defused_gzip_decode(data, limit=None): """gzip encoded data -> unencoded data @@ -55,7 +54,7 @@ def defused_gzip_decode(data, limit=None): f = io.BytesIO(data) gzf = gzip.GzipFile(mode="rb", fileobj=f) try: - if limit < 0: # no limit + if limit < 0: # no limit decoded = gzf.read() else: decoded = gzf.read(limit + 1) @@ -72,13 +71,14 @@ class DefusedGzipDecodedResponse(gzip.GzipFile if gzip else object): """a file-like object to decode a response encoded with the gzip method, as described in RFC 1952. """ + def __init__(self, response, limit=None): - #response doesn't support tell() and read(), required by - #GzipFile + # response doesn't support tell() and read(), required by + # GzipFile if not gzip: raise NotImplementedError self.limit = limit = limit if limit is not None else MAX_DATA - if limit < 0: # no limit + if limit < 0: # no limit data = response.read() self.readlength = None else: @@ -107,6 +107,7 @@ class DefusedGzipDecodedResponse(gzip.GzipFile if gzip else object): class DefusedExpatParser(ExpatParser): + def __init__(self, target, forbid_dtd=False, forbid_entities=True, forbid_external=True): ExpatParser.__init__(self, target) @@ -142,18 +143,14 @@ class DefusedExpatParser(ExpatParser): def monkey_patch(): xmlrpc_client.FastParser = DefusedExpatParser - if PY26 or PY31: - # Python 2.6 and 3.1 have no gzip support in xmlrpc - return xmlrpc_client.GzipDecodedResponse = DefusedGzipDecodedResponse xmlrpc_client.gzip_decode = defused_gzip_decode if xmlrpc_server: xmlrpc_server.gzip_decode = defused_gzip_decode + def unmonkey_patch(): xmlrpc_client.FastParser = None - if PY26 or PY31: - return xmlrpc_client.GzipDecodedResponse = _OrigGzipDecodedResponse xmlrpc_client.gzip_decode = _orig_gzip_decode if xmlrpc_server: diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8b48b64 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,12 @@ +[bdist_wheel] +universal = 1 + +[aliases] +packages = clean --all egg_info bdist_wheel sdist --format=gztar +release = packages register upload + +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + @@ -1,17 +1,23 @@ #!/usr/bin/env python from __future__ import absolute_import import sys -from distutils.core import setup, Command +from distutils.core import Command import subprocess +from setuptools import setup + import defusedxml + class PyTest(Command): user_options = [] + def initialize_options(self): pass + def finalize_options(self): pass + def run(self): errno = subprocess.call([sys.executable, "tests.py"]) raise SystemExit(errno) @@ -23,6 +29,7 @@ with open("README.txt") as f: with open("CHANGES.txt") as f: long_description.append(f.read()) + setup( name="defusedxml", version=defusedxml.__version__, @@ -32,8 +39,8 @@ setup( author_email="christian@python.org", maintainer="Christian Heimes", maintainer_email="christian@python.org", - url="https://bitbucket.org/tiran/defusedxml", - download_url="http://pypi.python.org/pypi/defusedxml", + url="https://github.com/tiran/defusedxml", + download_url="https://pypi.python.org/pypi/defusedxml", keywords="xml bomb DoS", platforms="all", license="PSFL", @@ -46,13 +53,11 @@ setup( "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.1", - "Programming Language :: Python :: 3.2", - "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", "Topic :: Text Processing :: Markup :: XML", ], ) - @@ -3,7 +3,6 @@ import os import sys import unittest import io -import re from xml.sax.saxutils import XMLGenerator from xml.sax import SAXParseException @@ -11,9 +10,9 @@ from pyexpat import ExpatError from defusedxml import cElementTree, ElementTree, minidom, pulldom, sax, xmlrpc from defusedxml import defuse_stdlib -from defusedxml import (DefusedXmlException, DTDForbidden, EntitiesForbidden, +from defusedxml import (DTDForbidden, EntitiesForbidden, ExternalReferenceForbidden, NotSupportedError) -from defusedxml.common import PY3, PY26, PY31 +from defusedxml.common import PY3 try: @@ -39,39 +38,6 @@ os.environ["http_proxy"] = "http://127.0.9.1:9" os.environ["https_proxy"] = os.environ["http_proxy"] os.environ["ftp_proxy"] = os.environ["http_proxy"] -if PY26 or PY31: - class _AssertRaisesContext(object): - def __init__(self, expected, test_case, expected_regexp=None): - self.expected = expected - self.failureException = test_case.failureException - self.expected_regexp = expected_regexp - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, tb): - if exc_type is None: - try: - exc_name = self.expected.__name__ - except AttributeError: - exc_name = str(self.expected) - raise self.failureException( - "{0} not raised".format(exc_name)) - if not issubclass(exc_type, self.expected): - # let unexpected exceptions pass through - return False - self.exception = exc_value # store for later retrieval - if self.expected_regexp is None: - return True - - expected_regexp = self.expected_regexp - if isinstance(expected_regexp, basestring): - expected_regexp = re.compile(expected_regexp) - if not expected_regexp.search(str(exc_value)): - raise self.failureException('"%s" does not match "%s"' % - (expected_regexp.pattern, str(exc_value))) - return True - class DefusedTestCase(unittest.TestCase): @@ -90,21 +56,6 @@ class DefusedTestCase(unittest.TestCase): xml_bomb2 = os.path.join(HERE, "xmltestdata", "xmlbomb2.xml") xml_cyclic = os.path.join(HERE, "xmltestdata", "cyclic.xml") - if PY26 or PY31: - # old Python versions don't have these useful test methods - def assertRaises(self, excClass, callableObj=None, *args, **kwargs): - context = _AssertRaisesContext(excClass, self) - if callableObj is None: - return context - with context: - callableObj(*args, **kwargs) - - def assertIn(self, member, container, msg=None): - if member not in container: - standardMsg = '%s not found in %s' % (repr(member), - repr(container)) - self.fail(self._formatMessage(msg, standardMsg)) - def get_content(self, xmlfile): mode = "rb" if self.content_binary else "r" with io.open(xmlfile, mode) as f: @@ -189,7 +140,6 @@ class BaseTests(DefusedTestCase): self.assertRaises(DTDForbidden, self.iterparse, self.xml_dtd, forbid_dtd=True) - def test_dtd_with_external_ref(self): if self.dtd_external_ref: self.assertRaises(self.external_ref_exception, self.parse, @@ -220,9 +170,8 @@ class BaseTests(DefusedTestCase): class TestDefusedElementTree(BaseTests): module = ElementTree - - ## etree doesn't do external ref lookup - #external_ref_exception = ElementTree.ParseError + # etree doesn't do external ref lookup + # external_ref_exception = ElementTree.ParseError cyclic_error = ElementTree.ParseError @@ -247,7 +196,6 @@ class TestDefusedMinidom(BaseTests): cyclic_error = ExpatError - iterparse = None def parse(self, xmlfile, **kwargs): @@ -302,10 +250,6 @@ class TestDefusedSax(BaseTests): return result.getvalue() def test_exceptions(self): - if PY26: - # Python 2.6 unittest doesn't support with self.assertRaises() - return - with self.assertRaises(EntitiesForbidden) as ctx: self.parse(self.xml_bomb) msg = "EntitiesForbidden(name='a', system_id=None, public_id=None)" @@ -334,11 +278,17 @@ class TestDefusedLxml(BaseTests): content_binary = True def parse(self, xmlfile, **kwargs): - tree = self.module.parse(xmlfile, **kwargs) + try: + tree = self.module.parse(xmlfile, **kwargs) + except XMLSyntaxError: + self.skipTest("lxml detects entityt reference loop") return self.module.tostring(tree) def parseString(self, xmlstring, **kwargs): - tree = self.module.fromstring(xmlstring, **kwargs) + try: + tree = self.module.fromstring(xmlstring, **kwargs) + except XMLSyntaxError: + self.skipTest("lxml detects entityt reference loop") return self.module.tostring(tree) if not LXML3: @@ -355,8 +305,11 @@ class TestDefusedLxml(BaseTests): pass def test_restricted_element1(self): - tree = self.module.parse(self.xml_bomb, forbid_dtd=False, - forbid_entities=False) + try: + tree = self.module.parse(self.xml_bomb, forbid_dtd=False, + forbid_entities=False) + except XMLSyntaxError: + self.skipTest("lxml detects entityt reference loop") root = tree.getroot() self.assertEqual(root.text, None) @@ -370,8 +323,11 @@ class TestDefusedLxml(BaseTests): self.assertEqual(root.getnext(), None) def test_restricted_element2(self): - tree = self.module.parse(self.xml_bomb2, forbid_dtd=False, - forbid_entities=False) + try: + tree = self.module.parse(self.xml_bomb2, forbid_dtd=False, + forbid_entities=False) + except XMLSyntaxError: + self.skipTest("lxml detects entityt reference loop") root = tree.getroot() bomb, tag = root self.assertEqual(root.text, "text") @@ -416,6 +372,7 @@ class TestDefusedLxml(BaseTests): class XmlRpcTarget(object): + def __init__(self): self._data = [] @@ -434,8 +391,10 @@ class XmlRpcTarget(object): def end(self, tag): self._data.append("</%s>" % tag) + class TestXmlRpc(DefusedTestCase): module = xmlrpc + def parse(self, xmlfile, **kwargs): target = XmlRpcTarget() parser = self.module.DefusedExpatParser(target, **kwargs) @@ -459,7 +418,7 @@ class TestXmlRpc(DefusedTestCase): self.assertRaises(DTDForbidden, self.parse, self.xml_dtd, forbid_dtd=True) - #def test_xmlrpc_unpatched(self): + # def test_xmlrpc_unpatched(self): # for fname in (self.xml_external, self.xml_dtd): # print(self.parse_unpatched(fname)) @@ -471,6 +430,7 @@ class TestXmlRpc(DefusedTestCase): class TestDefusedGzip(DefusedTestCase): + def get_gzipped(self, length): f = io.BytesIO() gzf = gzip.GzipFile(mode="wb", fileobj=f) @@ -492,11 +452,11 @@ class TestDefusedGzip(DefusedTestCase): def test_defused_gzip_decode(self): data = self.get_gzipped(4096).getvalue() result = xmlrpc.defused_gzip_decode(data) - self.assertEqual(result, b"d" *4096) + self.assertEqual(result, b"d" * 4096) result = xmlrpc.defused_gzip_decode(data, -1) - self.assertEqual(result, b"d" *4096) + self.assertEqual(result, b"d" * 4096) result = xmlrpc.defused_gzip_decode(data, 4096) - self.assertEqual(result, b"d" *4096) + self.assertEqual(result, b"d" * 4096) with self.assertRaises(ValueError): result = xmlrpc.defused_gzip_decode(data, 4095) with self.assertRaises(ValueError): @@ -507,7 +467,7 @@ class TestDefusedGzip(DefusedTestCase): response = self.get_gzipped(4096) data = self.decode_response(response) - self.assertEqual(data, b"d" *4096) + self.assertEqual(data, b"d" * 4096) with self.assertRaises(ValueError): response = self.get_gzipped(4096) @@ -536,6 +496,7 @@ def test_main(): suite.addTests(unittest.makeSuite(TestDefusedGzip)) return suite + if __name__ == "__main__": suite = test_main() result = unittest.TextTestRunner(verbosity=1).run(suite) |