diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2021-04-09 10:37:06 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2021-04-09 10:37:06 -0700 |
commit | 9c24f4b18817b61a721e28a7172d0ab3249a548a (patch) | |
tree | e5353080bf3a2a37bdef56aa4352a2eb6040d282 | |
parent | bcd65cd56091b71101f716e5531c45e482486c8c (diff) |
Cherry pick upstream commit 3f38f73 to fix CVE-2021-29421
-rw-r--r-- | debian/changelog | 6 | ||||
-rw-r--r-- | debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch | 120 | ||||
-rw-r--r-- | debian/patches/series | 1 |
3 files changed, 127 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog index 03426b9..5673b4b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +pikepdf (1.17.3+dfsg-5) UNRELEASED; urgency=medium + + * Cherry pick upstream commit 3f38f73 to fix CVE-2021-29421 (Closes: #986274). + + -- Sean Whitton <spwhitton@spwhitton.name> Fri, 09 Apr 2021 10:34:49 -0700 + pikepdf (1.17.3+dfsg-4) unstable; urgency=medium * Cherry-pick upstream commit 7ca375cb to fix another broken text diff --git a/debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch b/debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch new file mode 100644 index 0000000..4a64045 --- /dev/null +++ b/debian/patches/Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch @@ -0,0 +1,120 @@ +From: "James R. Barlow" <james@purplerock.ca> +Date: Sat, 27 Mar 2021 00:43:21 -0700 +Subject: Fix XXE vulnerability in XMP metadata parsing + +For details: +https://portswigger.net/web-security/xxe + +Reported by: Eric Therond eric.therond@sonarsource.com) of Sonarsource (https://www.sonarsource.com/) + +(cherry picked from commit 3f38f73218e5e782fe411ccbb3b44a793c0b343a) +--- + src/pikepdf/_xml.py | 30 ++++++++++++++++++++++++++++++ + src/pikepdf/models/metadata.py | 10 +++++----- + tests/test_metadata.py | 24 ++++++++++++++++++++++++ + 3 files changed, 59 insertions(+), 5 deletions(-) + create mode 100644 src/pikepdf/_xml.py + +diff --git a/src/pikepdf/_xml.py b/src/pikepdf/_xml.py +new file mode 100644 +index 0000000..f0e1c38 +--- /dev/null ++++ b/src/pikepdf/_xml.py +@@ -0,0 +1,30 @@ ++# This Source Code Form is subject to the terms of the Mozilla Public ++# License, v. 2.0. If a copy of the MPL was not distributed with this ++# file, You can obtain one at http://mozilla.org/MPL/2.0/. ++# ++# Copyright (C) 2021, James R. Barlow (https://github.com/jbarlow83/) ++ ++ ++from typing import IO, Any, AnyStr, Union ++ ++from lxml.etree import XMLParser as _UnsafeXMLParser ++from lxml.etree import parse as _parse ++ ++ ++class _XMLParser(_UnsafeXMLParser): ++ def __init__(self, *args, **kwargs): ++ # Prevent XXE attacks ++ # https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2755 ++ kwargs['resolve_entities'] = False ++ kwargs['no_network'] = True ++ super().__init__(*args, **kwargs) ++ ++ ++def parse_xml(source: Union[AnyStr, IO[Any]], recover: bool = False): ++ """Wrapper around lxml's parse to provide protection against XXE attacks.""" ++ ++ parser = _XMLParser(recover=recover, remove_pis=False) ++ return _parse(source, parser=parser) ++ ++ ++__all__ = ['parse_xml'] +diff --git a/src/pikepdf/models/metadata.py b/src/pikepdf/models/metadata.py +index e34c2e4..ccd9da7 100644 +--- a/src/pikepdf/models/metadata.py ++++ b/src/pikepdf/models/metadata.py +@@ -15,10 +15,11 @@ from io import BytesIO + from warnings import warn + + from lxml import etree +-from lxml.etree import QName, XMLParser, XMLSyntaxError, parse ++from lxml.etree import QName, XMLSyntaxError + + from .. import Name, Stream, String + from .. import __version__ as pikepdf_version ++from .._xml import parse_xml + + XMP_NS_DC = "http://purl.org/dc/elements/1.1/" + XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/" +@@ -334,14 +335,13 @@ class PdfMetadata(MutableMapping): + data = XMP_EMPTY # on some platforms lxml chokes on empty documents + + def basic_parser(xml): +- return parse(BytesIO(xml)) ++ return parse_xml(BytesIO(xml)) + + def strip_illegal_bytes_parser(xml): +- return parse(BytesIO(re_xml_illegal_bytes.sub(b'', xml))) ++ return parse_xml(BytesIO(re_xml_illegal_bytes.sub(b'', xml))) + + def recovery_parser(xml): +- parser = XMLParser(recover=True) +- return parse(BytesIO(xml), parser) ++ return parse_xml(BytesIO(xml), recover=True) + + def replace_with_empty_xmp(_xml=None): + log.warning("Error occurred parsing XMP, replacing with empty XMP.") +diff --git a/tests/test_metadata.py b/tests/test_metadata.py +index 120b1a9..e9d96a0 100644 +--- a/tests/test_metadata.py ++++ b/tests/test_metadata.py +@@ -587,3 +587,27 @@ def test_issue_100(trivial): + UserWarning, match="no XMP equivalent" + ): + m.load_from_docinfo({'/AAPL:Example': pikepdf.Array([42])}) ++ ++ ++def test_xxe(trivial, outdir): ++ secret = outdir / 'secret.txt' ++ secret.write_text("This is a secret") ++ trivial.Root.Metadata = Stream( ++ trivial, ++ b"""\ ++<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?> ++<!DOCTYPE rdf:RDF [<!ENTITY xxe SYSTEM "file://%s">]> ++<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='Image'> ++<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> ++<note> ++<to>&xxe;</to> ++<from>xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</from> ++</note> ++</rdf:RDF> ++</x:xmpmeta> ++<?xpacket end='w'?> ++ """ ++ % os.fsencode(secret), ++ ) ++ with trivial.open_metadata() as m: ++ assert 'This is a secret' not in str(m) diff --git a/debian/patches/series b/debian/patches/series index 56b42eb..9f793c8 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -6,3 +6,4 @@ Fix-compatibility-with-pybind11.patch Fix-externalize_inline_images-for-qpdf-10.1.0.patch libqpdf-10.1.0-raises-different-exception.patch Fix-test_tokenfilter_is_abstract.patch +Fix-XXE-vulnerability-in-XMP-metadata-parsing.patch |