summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2021-04-09 10:37:06 -0700
committerSean Whitton <spwhitton@spwhitton.name>2021-04-09 10:37:06 -0700
commit9c24f4b18817b61a721e28a7172d0ab3249a548a (patch)
treee5353080bf3a2a37bdef56aa4352a2eb6040d282
parentbcd65cd56091b71101f716e5531c45e482486c8c (diff)
Cherry pick upstream commit 3f38f73 to fix CVE-2021-29421
-rw-r--r--debian/changelog6
-rw-r--r--debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch120
-rw-r--r--debian/patches/series1
3 files changed, 127 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog
index 03426b9..5673b4b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+pikepdf (1.17.3+dfsg-5) UNRELEASED; urgency=medium
+
+ * Cherry pick upstream commit 3f38f73 to fix CVE-2021-29421 (Closes: #986274).
+
+ -- Sean Whitton <spwhitton@spwhitton.name> Fri, 09 Apr 2021 10:34:49 -0700
+
pikepdf (1.17.3+dfsg-4) unstable; urgency=medium
* Cherry-pick upstream commit 7ca375cb to fix another broken text
diff --git a/debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch b/debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch
new file mode 100644
index 0000000..4a64045
--- /dev/null
+++ b/debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch
@@ -0,0 +1,120 @@
+From: "James R. Barlow" <james@purplerock.ca>
+Date: Sat, 27 Mar 2021 00:43:21 -0700
+Subject: Fix XXE vulnerability in XMP metadata parsing
+
+For details:
+https://portswigger.net/web-security/xxe
+
+Reported by: Eric Therond eric.therond@sonarsource.com) of Sonarsource (https://www.sonarsource.com/)
+
+(cherry picked from commit 3f38f73218e5e782fe411ccbb3b44a793c0b343a)
+---
+ src/pikepdf/_xml.py | 30 ++++++++++++++++++++++++++++++
+ src/pikepdf/models/metadata.py | 10 +++++-----
+ tests/test_metadata.py | 24 ++++++++++++++++++++++++
+ 3 files changed, 59 insertions(+), 5 deletions(-)
+ create mode 100644 src/pikepdf/_xml.py
+
+diff --git a/src/pikepdf/_xml.py b/src/pikepdf/_xml.py
+new file mode 100644
+index 0000000..f0e1c38
+--- /dev/null
++++ b/src/pikepdf/_xml.py
+@@ -0,0 +1,30 @@
++# This Source Code Form is subject to the terms of the Mozilla Public
++# License, v. 2.0. If a copy of the MPL was not distributed with this
++# file, You can obtain one at http://mozilla.org/MPL/2.0/.
++#
++# Copyright (C) 2021, James R. Barlow (https://github.com/jbarlow83/)
++
++
++from typing import IO, Any, AnyStr, Union
++
++from lxml.etree import XMLParser as _UnsafeXMLParser
++from lxml.etree import parse as _parse
++
++
++class _XMLParser(_UnsafeXMLParser):
++ def __init__(self, *args, **kwargs):
++ # Prevent XXE attacks
++ # https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2755
++ kwargs['resolve_entities'] = False
++ kwargs['no_network'] = True
++ super().__init__(*args, **kwargs)
++
++
++def parse_xml(source: Union[AnyStr, IO[Any]], recover: bool = False):
++ """Wrapper around lxml's parse to provide protection against XXE attacks."""
++
++ parser = _XMLParser(recover=recover, remove_pis=False)
++ return _parse(source, parser=parser)
++
++
++__all__ = ['parse_xml']
+diff --git a/src/pikepdf/models/metadata.py b/src/pikepdf/models/metadata.py
+index e34c2e4..ccd9da7 100644
+--- a/src/pikepdf/models/metadata.py
++++ b/src/pikepdf/models/metadata.py
+@@ -15,10 +15,11 @@ from io import BytesIO
+ from warnings import warn
+
+ from lxml import etree
+-from lxml.etree import QName, XMLParser, XMLSyntaxError, parse
++from lxml.etree import QName, XMLSyntaxError
+
+ from .. import Name, Stream, String
+ from .. import __version__ as pikepdf_version
++from .._xml import parse_xml
+
+ XMP_NS_DC = "http://purl.org/dc/elements/1.1/"
+ XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/"
+@@ -334,14 +335,13 @@ class PdfMetadata(MutableMapping):
+ data = XMP_EMPTY # on some platforms lxml chokes on empty documents
+
+ def basic_parser(xml):
+- return parse(BytesIO(xml))
++ return parse_xml(BytesIO(xml))
+
+ def strip_illegal_bytes_parser(xml):
+- return parse(BytesIO(re_xml_illegal_bytes.sub(b'', xml)))
++ return parse_xml(BytesIO(re_xml_illegal_bytes.sub(b'', xml)))
+
+ def recovery_parser(xml):
+- parser = XMLParser(recover=True)
+- return parse(BytesIO(xml), parser)
++ return parse_xml(BytesIO(xml), recover=True)
+
+ def replace_with_empty_xmp(_xml=None):
+ log.warning("Error occurred parsing XMP, replacing with empty XMP.")
+diff --git a/tests/test_metadata.py b/tests/test_metadata.py
+index 120b1a9..e9d96a0 100644
+--- a/tests/test_metadata.py
++++ b/tests/test_metadata.py
+@@ -587,3 +587,27 @@ def test_issue_100(trivial):
+ UserWarning, match="no XMP equivalent"
+ ):
+ m.load_from_docinfo({'/AAPL:Example': pikepdf.Array([42])})
++
++
++def test_xxe(trivial, outdir):
++ secret = outdir / 'secret.txt'
++ secret.write_text("This is a secret")
++ trivial.Root.Metadata = Stream(
++ trivial,
++ b"""\
++<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
++<!DOCTYPE rdf:RDF [<!ENTITY xxe SYSTEM "file://%s">]>
++<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image'>
++<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
++<note>
++<to>&xxe;</to>
++<from>xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</from>
++</note>
++</rdf:RDF>
++</x:xmpmeta>
++<?xpacket end='w'?>
++ """
++ % os.fsencode(secret),
++ )
++ with trivial.open_metadata() as m:
++ assert 'This is a secret' not in str(m)
diff --git a/debian/patches/series b/debian/patches/series
index 56b42eb..9f793c8 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -6,3 +6,4 @@ Fix-compatibility-with-pybind11.patch
Fix-externalize_inline_images-for-qpdf-10.1.0.patch
libqpdf-10.1.0-raises-different-exception.patch
Fix-test_tokenfilter_is_abstract.patch
+Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch