summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/conftest.py15
-rw-r--r--tests/resources/1biticc.pdfbin0 -> 5661 bytes
-rw-r--r--tests/resources/graph-encrypted.pdfbin293636 -> 296661 bytes
-rw-r--r--tests/resources/pike-flate-jp2.pdfbin0 -> 18471 bytes
-rw-r--r--tests/test_codec.py16
-rw-r--r--tests/test_dictionary.py6
-rw-r--r--tests/test_encrypt.py128
-rw-r--r--tests/test_formxobject.py45
-rw-r--r--tests/test_image_access.py143
-rw-r--r--tests/test_io.py26
-rw-r--r--tests/test_ipython.py3
-rw-r--r--tests/test_metadata.py132
-rw-r--r--tests/test_object.py185
-rw-r--r--tests/test_pages.py72
-rw-r--r--tests/test_parsers.py49
-rw-r--r--tests/test_pdf.py91
-rw-r--r--tests/test_pdfa.py10
-rw-r--r--tests/test_private_pdfs.py16
-rw-r--r--tests/test_refcount.py11
-rw-r--r--tests/test_sanity.py40
20 files changed, 763 insertions, 225 deletions
diff --git a/tests/conftest.py b/tests/conftest.py
index 8a67e83..8887415 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,12 +1,8 @@
import os
import sys
-import platform
-
-pytest_plugins = ['helpers_namespace']
+from pathlib import Path
import pytest
-from pathlib import Path
-from subprocess import Popen, PIPE
if sys.version_info < (3, 4):
@@ -24,5 +20,10 @@ def resources():
@pytest.fixture(scope="function")
-def outdir(tmpdir):
- return Path(str(tmpdir))
+def outdir(tmp_path):
+ return tmp_path
+
+
+@pytest.fixture(scope="function")
+def outpdf(tmp_path):
+ return tmp_path / 'out.pdf'
diff --git a/tests/resources/1biticc.pdf b/tests/resources/1biticc.pdf
new file mode 100644
index 0000000..b33b4bb
--- /dev/null
+++ b/tests/resources/1biticc.pdf
Binary files differ
diff --git a/tests/resources/graph-encrypted.pdf b/tests/resources/graph-encrypted.pdf
index 6e086af..6c9741e 100644
--- a/tests/resources/graph-encrypted.pdf
+++ b/tests/resources/graph-encrypted.pdf
Binary files differ
diff --git a/tests/resources/pike-flate-jp2.pdf b/tests/resources/pike-flate-jp2.pdf
new file mode 100644
index 0000000..c074e69
--- /dev/null
+++ b/tests/resources/pike-flate-jp2.pdf
Binary files differ
diff --git a/tests/test_codec.py b/tests/test_codec.py
new file mode 100644
index 0000000..c101d8c
--- /dev/null
+++ b/tests/test_codec.py
@@ -0,0 +1,16 @@
+import pytest
+
+import pikepdf.codec
+
+
+def test_encode():
+ assert 'abc'.encode('pdfdoc') == b'abc'
+ with pytest.raises(ValueError):
+ '你好'.encode('pdfdoc')
+ assert '你好 world'.encode('pdfdoc', 'replace') == b'?? world'
+ assert '你好 world'.encode('pdfdoc', 'ignore') == b' world'
+
+
+def test_decode():
+ assert b'A'.decode('pdfdoc') == 'A'
+ assert b'\xa0'.decode('pdfdoc') == '€'
diff --git a/tests/test_dictionary.py b/tests/test_dictionary.py
index 5341968..78a558a 100644
--- a/tests/test_dictionary.py
+++ b/tests/test_dictionary.py
@@ -1,6 +1,10 @@
-from pikepdf import Pdf
import pytest
+from pikepdf import Pdf
+
+
+# pylint: disable=redefined-outer-name,pointless-statement,expression-not-assigned
+
@pytest.fixture
def congress(resources):
diff --git a/tests/test_encrypt.py b/tests/test_encrypt.py
new file mode 100644
index 0000000..50b4e8d
--- /dev/null
+++ b/tests/test_encrypt.py
@@ -0,0 +1,128 @@
+import pytest
+
+import pikepdf
+
+
+@pytest.fixture
+def trivial(resources):
+ return pikepdf.open(resources / 'pal-1bit-trivial.pdf')
+
+
+@pytest.fixture
+def graph_encrypted(resources):
+ return pikepdf.open(resources / 'graph-encrypted.pdf', password='owner')
+
+
+@pytest.mark.parametrize(
+ "R,owner,user",
+ [
+ (6, "foo", "bar"),
+ (4, "password", "password"),
+ (3, "12345678", "secret"),
+ (2, "qwerty", "123456"),
+ ],
+)
+def test_encrypt_basic(trivial, outpdf, R, owner, user):
+ trivial.save(outpdf, encryption=dict(R=R, owner=owner, user=user))
+ pdf_owner = pikepdf.open(outpdf, password=owner)
+ assert pdf_owner.is_encrypted
+ pdf_user = pikepdf.open(outpdf, password=user)
+ assert pdf_user.is_encrypted
+
+
+def test_encrypt_R5(trivial, outpdf):
+ with pytest.warns(UserWarning):
+ trivial.save(outpdf, encryption=dict(R=5, owner='foo', user='foo'))
+
+
+@pytest.mark.parametrize("R", [-1, 0, 1, 7, 9, 42])
+def test_encrypt_invalid_level_value(trivial, outpdf, R):
+ with pytest.raises(ValueError):
+ trivial.save(outpdf, encryption=dict(R=R, owner='foo', user='foo'))
+
+
+@pytest.mark.parametrize("R", [3.14, '6', b'6', None])
+def test_encrypt_invalid_level(trivial, outpdf, R):
+ with pytest.raises(TypeError):
+ trivial.save(outpdf, encryption=dict(R=R, owner='foo', user='foo'))
+
+
+def test_encrypt_without_owner(trivial, outpdf):
+ trivial.save(outpdf, encryption=dict(user='foo'))
+
+
+def test_encrypt_no_passwords(trivial, outpdf):
+ trivial.save(outpdf, encryption=dict(R=6))
+
+
+def test_encrypt_permissions_deny(trivial, outpdf):
+ perms = pikepdf.models.Permissions(extract=False)
+ trivial.save(
+ outpdf, encryption=pikepdf.Encryption(owner='sun', user='moon', allow=perms)
+ )
+ pdf = pikepdf.open(outpdf, password='sun')
+ assert not pdf.allow.extract
+ assert pdf.allow.modify_form
+
+
+def test_encrypt_info(trivial, outpdf):
+ trivial.save(outpdf, encryption=dict(R=4, owner='foo', user='bar'))
+ pdf = pikepdf.open(outpdf, password='foo')
+ assert pdf.encryption.user_password == b'bar'
+ assert pdf.encryption.bits == 128
+
+
+@pytest.mark.parametrize(
+ "R,owner,user,aes,metadata,err",
+ [
+ (6, "foo", "bar", 42, False, r"aes.*bool"),
+ (6, "password", "password", True, 42, r"metadata.*bool"),
+ (3, "12345678", "secret", False, True, r"metadata.*R < 4"),
+ (2, "qwerty", "123456", True, False, r"AES.*R < 4"),
+ (6, "rc4", "rc4", False, True, r"R = 6.*AES"),
+ (4, "met", "met", False, True, r"unless AES"),
+ ],
+)
+def test_bad_settings(trivial, outpdf, R, owner, user, aes, metadata, err):
+ with pytest.raises(Exception, match=err):
+ trivial.save(
+ outpdf,
+ encryption=pikepdf.Encryption(
+ R=R, owner=owner, user=user, aes=aes, metadata=metadata
+ ),
+ )
+
+
+def test_block_encryption_and_normalize(trivial, outpdf):
+ with pytest.raises(ValueError, match=r'encryption and normalize_content'):
+ trivial.save(
+ outpdf,
+ encryption=pikepdf.Encryption(owner='foo', user='bar'),
+ normalize_content=True,
+ )
+
+
+def test_consistency_saving_removes_encryption(graph_encrypted, outpdf):
+ # This was not intended behavior. It's a side effect of unconditionally calling
+ # w.setDecodeLevel(), which disables preserving encryption in
+ # QPDFWriter::doWriteSetup()
+ graph_encrypted.save(outpdf)
+ with pikepdf.open(outpdf) as pdf:
+ assert not pdf.is_encrypted
+
+
+def test_save_without_encryption(graph_encrypted, outpdf):
+ graph_encrypted.save(outpdf, encryption=False)
+ with pikepdf.open(outpdf) as pdf:
+ assert not pdf.is_encrypted
+
+
+def test_save_preserve_encryption(graph_encrypted, outpdf):
+ graph_encrypted.save(outpdf, encryption=True)
+ with pikepdf.open(outpdf, 'owner') as pdf:
+ assert pdf.is_encrypted
+
+
+def test_preserve_encryption_not_encrypted(trivial, outpdf):
+ with pytest.raises(ValueError):
+ trivial.save(outpdf, encryption=True)
diff --git a/tests/test_formxobject.py b/tests/test_formxobject.py
index f402d76..7e252c9 100644
--- a/tests/test_formxobject.py
+++ b/tests/test_formxobject.py
@@ -1,5 +1,6 @@
import pytest
-from pikepdf import Pdf, Object, Stream, Name, Dictionary
+
+from pikepdf import Dictionary, Name, Object, Pdf, Stream
# pylint: disable=e1137
@@ -8,7 +9,8 @@ def test_create_form_xobjects(outdir):
pdf = Pdf.new()
font = pdf.make_indirect(
- Object.parse(b"""
+ Object.parse(
+ b"""
<<
/Type /Font
/Subtype /Type1
@@ -16,14 +18,16 @@ def test_create_form_xobjects(outdir):
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
- """)
+ """
+ )
)
width, height = 100, 100
image_data = b"\xff\x7f\x00" * (width * height)
image = Stream(pdf, image_data)
- image.stream_dict = Object.parse("""
+ image.stream_dict = Object.parse(
+ """
<<
/Type /XObject
/Subtype /Image
@@ -32,15 +36,17 @@ def test_create_form_xobjects(outdir):
/Width 100
/Height 100
>>
- """)
+ """
+ )
xobj_image = Dictionary({'/Im1': image})
- form_xobj_res = Dictionary({
- '/XObject': xobj_image
- })
- form_xobj = Stream(pdf, b"""
+ form_xobj_res = Dictionary({'/XObject': xobj_image})
+ form_xobj = Stream(
+ pdf,
+ b"""
/Im1 Do
- """)
+ """,
+ )
form_xobj['/Type'] = Name('/XObject')
form_xobj['/Subtype'] = Name('/Form')
form_xobj['/FormType'] = 1
@@ -50,10 +56,7 @@ def test_create_form_xobjects(outdir):
rfont = {'/F1': font}
- resources = {
- '/Font': rfont,
- '/XObject': {'/Form1': form_xobj},
- }
+ resources = {'/Font': rfont, '/XObject': {'/Form1': form_xobj}}
mediabox = [0, 0, 612, 792]
@@ -65,12 +68,14 @@ def test_create_form_xobjects(outdir):
contents = Stream(pdf, stream)
- page = pdf.make_indirect({
- '/Type': Name('/Page'),
- '/MediaBox': mediabox,
- '/Contents': contents,
- '/Resources': resources
- })
+ page = pdf.make_indirect(
+ {
+ '/Type': Name('/Page'),
+ '/MediaBox': mediabox,
+ '/Contents': contents,
+ '/Resources': resources,
+ }
+ )
pdf.pages.append(page)
pdf.save(outdir / 'formxobj.pdf')
diff --git a/tests/test_image_access.py b/tests/test_image_access.py
index 05fa010..113a5ef 100644
--- a/tests/test_image_access.py
+++ b/tests/test_image_access.py
@@ -1,16 +1,28 @@
-import pytest
-import imghdr
-from io import BytesIO
-from PIL import Image, features as PIL_features
import zlib
+from io import BytesIO
+from pathlib import Path
-# pylint: disable=w0621
-
+import pytest
+from PIL import Image
+from PIL import features as PIL_features
from pikepdf import (
- Pdf, PdfImage, PdfError, Name,
- parse_content_stream, PdfInlineImage, Stream, StreamDecodeLevel
+ Array,
+ Dictionary,
+ Name,
+ Pdf,
+ PdfError,
+ PdfImage,
+ PdfInlineImage,
+ Stream,
+ StreamDecodeLevel,
+ parse_content_stream,
)
+from pikepdf._cpphelpers import fspath
+from pikepdf.models.image import UnsupportedImageTypeError
+
+
+# pylint: disable=redefined-outer-name
def first_image_in(filename):
@@ -55,10 +67,7 @@ def test_image_replace(congress, outdir):
grayscale = pillowimage.convert('L')
grayscale = grayscale.resize((4, 4)) # So it is not obnoxious on error
- congress[0].write(
- zlib.compress(grayscale.tobytes()),
- filter=Name("/FlateDecode")
- )
+ congress[0].write(zlib.compress(grayscale.tobytes()), filter=Name("/FlateDecode"))
congress[0].ColorSpace = Name("/DeviceGray")
pdf = congress[1]
pdf.save(outdir / 'congress_gray.pdf')
@@ -69,7 +78,8 @@ def test_lowlevel_jpeg(congress):
with pytest.raises(PdfError):
congress[0].read_bytes()
- assert imghdr.what('', h=raw_bytes) == 'jpeg'
+ im = Image.open(BytesIO(raw_bytes))
+ assert im.format == 'JPEG'
pim = PdfImage(congress[0])
b = BytesIO()
@@ -89,8 +99,7 @@ def test_lowlevel_replace_jpeg(congress, outdir):
grayscale = grayscale.resize((4, 4)) # So it is not obnoxious on error
congress[0].write(
- zlib.compress(grayscale.tobytes()[:10]),
- filter=Name("/FlateDecode")
+ zlib.compress(grayscale.tobytes()[:10]), filter=Name("/FlateDecode")
)
congress[0].ColorSpace = Name('/DeviceGray')
@@ -121,11 +130,14 @@ def test_bits_per_component_missing(congress):
assert PdfImage(congress[0]).bits_per_component == 8
-@pytest.mark.parametrize('w,h,pixeldata,cs,bpc', [
- (1, 1, b'\xff', '/DeviceGray', 1),
- (1, 1, b'\xf0', '/DeviceGray', 8),
- (1, 1, b'\xff\x00\xff', '/DeviceRGB', 8)
-])
+@pytest.mark.parametrize(
+ 'w,h,pixeldata,cs,bpc',
+ [
+ (1, 1, b'\xff', '/DeviceGray', 1),
+ (1, 1, b'\xf0', '/DeviceGray', 8),
+ (1, 1, b'\xff\x00\xff', '/DeviceRGB', 8),
+ ],
+)
def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc):
pdf = Pdf.new()
@@ -149,16 +161,15 @@ def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc):
'/Type': Name('/Page'),
'/MediaBox': mediabox,
'/Contents': contents,
- '/Resources': resources
+ '/Resources': resources,
}
page = pdf.make_indirect(page_dict)
pdf.pages.append(page)
- outfile = outdir / 'test{w}{h}{cs}{bpc}.pdf'.format(
- w=w, h=h, cs=cs[1:], bpc=bpc
+ outfile = outdir / 'test{w}{h}{cs}{bpc}.pdf'.format(w=w, h=h, cs=cs[1:], bpc=bpc)
+ pdf.save(
+ outfile, compress_streams=False, stream_decode_level=StreamDecodeLevel.none
)
- pdf.save(outfile, compress_streams=False,
- stream_decode_level=StreamDecodeLevel.none)
p2 = pdf.open(outfile)
pim = PdfImage(p2.pages[0].Resources.XObject['/Im1'])
@@ -185,16 +196,17 @@ def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc):
assert pim.mode == im.mode
-@pytest.mark.parametrize('filename,bpc,filters,ext,mode,format',
+@pytest.mark.parametrize(
+ 'filename,bpc,filters,ext,mode,format_',
[
('sandwich.pdf', 1, ['/CCITTFaxDecode'], '.tif', '1', 'TIFF'),
('congress-gray.pdf', 8, ['/DCTDecode'], '.jpg', 'L', 'JPEG'),
('congress.pdf', 8, ['/DCTDecode'], '.jpg', 'RGB', 'JPEG'),
- ('cmyk-jpeg.pdf', 8, ['/DCTDecode'], '.jpg', 'CMYK', 'JPEG')
- ]
+ ('cmyk-jpeg.pdf', 8, ['/DCTDecode'], '.jpg', 'CMYK', 'JPEG'),
+ ],
)
-def test_direct_extract(resources, filename, bpc, filters, ext, mode, format):
- xobj, pdf = first_image_in(resources / filename)
+def test_direct_extract(resources, filename, bpc, filters, ext, mode, format_):
+ xobj, _pdf = first_image_in(resources / filename)
pim = PdfImage(xobj)
assert pim.bits_per_component == bpc
@@ -207,14 +219,19 @@ def test_direct_extract(resources, filename, bpc, filters, ext, mode, format):
im = Image.open(outstream)
assert im.mode == mode
- assert im.format == format
+ assert im.format == format_
-@pytest.mark.parametrize('filename,bpc', [
- ('pal.pdf', 8),
- ('pal-1bit-trivial.pdf', 1),
- pytest.param('pal-1bit-rgb.pdf', 1, marks=pytest.mark.xfail(raises=NotImplementedError)),
-])
+@pytest.mark.parametrize(
+ 'filename,bpc',
+ [
+ ('pal.pdf', 8),
+ ('pal-1bit-trivial.pdf', 1),
+ pytest.param(
+ 'pal-1bit-rgb.pdf', 1, marks=pytest.mark.xfail(raises=NotImplementedError)
+ ),
+ ],
+)
def test_image_palette(resources, filename, bpc):
pdf = Pdf.open(resources / filename)
pim = PdfImage(next(iter(pdf.pages[0].images.values())))
@@ -234,8 +251,9 @@ def test_bool_in_inline_image():
assert piim.image_mask
-@pytest.mark.skipif(not PIL_features.check_codec('jpg_2000'),
- reason='no JPEG2000 codec')
+@pytest.mark.skipif(
+ not PIL_features.check_codec('jpg_2000'), reason='no JPEG2000 codec'
+)
def test_jp2(resources):
pdf = Pdf.open(resources / 'pike-jp2.pdf')
xobj = next(iter(pdf.pages[0].images.values()))
@@ -258,3 +276,52 @@ def test_jp2(resources):
pim = PdfImage(xobj)
assert pim.colorspace == '/DeviceRGB'
assert pim.bits_per_component == 8
+
+
+def test_extract_filepath(congress, outdir):
+ xobj, _pdf = congress
+ pim = PdfImage(xobj)
+
+ # fspath is for Python 3.5
+ result = pim.extract_to(fileprefix=fspath(outdir / 'image'))
+ assert Path(result).exists()
+ assert (outdir / 'image.jpg').exists()
+
+
+def test_extract_direct_fails_nondefault_colortransform(congress):
+ xobj, _pdf = congress
+
+ xobj.DecodeParms = Dictionary(
+ ColorTransform=42 # Non standard (or allowed in the spec)
+ )
+ pim = PdfImage(xobj)
+
+ bio = BytesIO()
+ with pytest.raises(UnsupportedImageTypeError):
+ pim._extract_direct(stream=bio)
+
+ xobj.ColorSpace = Name.DeviceCMYK
+ pim = PdfImage(xobj)
+ with pytest.raises(UnsupportedImageTypeError):
+ pim._extract_direct(stream=bio)
+
+
+def test_icc_use(resources):
+ xobj, _pdf = first_image_in(resources / '1biticc.pdf')
+
+ pim = PdfImage(xobj)
+ assert pim.mode == '1'
+ assert pim.colorspace == '/ICCBased'
+ assert pim.bits_per_component == 1
+
+ assert pim.icc.profile.xcolor_space == 'GRAY'
+
+
+def test_stacked_compression(resources):
+ xobj, _pdf = first_image_in(resources / 'pike-flate-jp2.pdf')
+
+ pim = PdfImage(xobj)
+ assert pim.mode == 'RGB'
+ assert pim.colorspace == '/DeviceRGB'
+ assert pim.bits_per_component == 8
+ assert pim.filters == ['/FlateDecode', '/JPXDecode']
diff --git a/tests/test_io.py b/tests/test_io.py
new file mode 100644
index 0000000..4ce8eb5
--- /dev/null
+++ b/tests/test_io.py
@@ -0,0 +1,26 @@
+import pytest
+
+from pikepdf import Pdf
+from io import BytesIO
+
+
+@pytest.fixture
+def sandwich(resources):
+ # Has XMP, docinfo, <?adobe-xap-filters esc="CRLF"?>, shorthand attribute XMP
+ return Pdf.open(resources / 'sandwich.pdf')
+
+
+class LimitedBytesIO(BytesIO):
+ """Version of BytesIO that only accepts small reads/writes"""
+
+ def write(self, b):
+ amt = min(len(b), 100)
+ return super().write(b[:amt])
+
+
+def test_weird_output_stream(sandwich):
+ bio = BytesIO()
+ lbio = LimitedBytesIO()
+ sandwich.save(bio, static_id=True)
+ sandwich.save(lbio, static_id=True)
+ assert bio.getvalue() == lbio.getvalue()
diff --git a/tests/test_ipython.py b/tests/test_ipython.py
index 4f616c8..36e2e4b 100644
--- a/tests/test_ipython.py
+++ b/tests/test_ipython.py
@@ -2,9 +2,10 @@
Test IPython/Jupyter display hooks
"""
-import pikepdf
import pytest
+import pikepdf
+
@pytest.fixture
def graph(resources):
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 41a879c..3de8ccf 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -1,28 +1,36 @@
-from pathlib import Path
-from datetime import datetime, timezone, timedelta
import re
+from datetime import datetime, timedelta, timezone
+import os
+from pathlib import Path
+import xml.etree.ElementTree as ET
import pytest
from hypothesis import given, example
from hypothesis.strategies import integers
+
import pikepdf
-from pikepdf import Pdf, Dictionary, Name, PasswordError, Stream
+from pikepdf import Dictionary, Name, PasswordError, Pdf, Stream
from pikepdf.models.metadata import (
- decode_pdf_date, encode_pdf_date,
- XMP_NS_DC, XMP_NS_PDF, XMP_NS_XMP,
- DateConverter
+ XMP_NS_DC,
+ XMP_NS_PDF,
+ XMP_NS_XMP,
+ DateConverter,
+ decode_pdf_date,
+ encode_pdf_date,
)
-import defusedxml.ElementTree as ET
-
try:
- from libxmp import XMPMeta
-except ImportError:
- XMPMeta = None
+ from libxmp import XMPMeta, XMPError
+except Exception:
+ XMPMeta, XMPError = None, None
+
+needs_libxmp = pytest.mark.skipif(
+ os.name == 'nt' or not XMPMeta, reason="test requires libxmp"
+)
pytestmark = pytest.mark.filterwarnings('ignore:.*XMLParser.*:DeprecationWarning')
-# pylint: disable=w0621
+# pylint: disable=redefined-outer-name,pointless-statement
@pytest.fixture
@@ -58,7 +66,10 @@ def invalid_creationdate(resources):
def test_lowlevel(sandwich):
meta = sandwich.open_metadata()
assert meta._qname('pdf:Producer') == '{http://ns.adobe.com/pdf/1.3/}Producer'
- assert meta._prefix_from_uri('{http://ns.adobe.com/pdf/1.3/}Producer') == 'pdf:Producer'
+ assert (
+ meta._prefix_from_uri('{http://ns.adobe.com/pdf/1.3/}Producer')
+ == 'pdf:Producer'
+ )
assert 'pdf:Producer' in meta
assert '{http://ns.adobe.com/pdf/1.3/}Producer' in meta
assert 'xmp:CreateDate' in meta
@@ -120,16 +131,14 @@ def test_add_new_xmp_and_mark(trivial):
) as xmp_view:
assert not xmp_view
- with trivial.open_metadata(update_docinfo=False
- ) as xmp:
+ with trivial.open_metadata(update_docinfo=False) as xmp:
assert not xmp # No changes at this point
del xmp
print(trivial.Root.Metadata.read_bytes())
- with trivial.open_metadata(update_docinfo=False
- ) as xmp:
- assert 'pikepdf' in xmp['pdf:Producer']
+ with trivial.open_metadata(update_docinfo=False) as xmp:
+ assert xmp['pdf:Producer'] == 'pikepdf ' + pikepdf.__version__
assert 'xmp:MetadataDate' in xmp
@@ -147,7 +156,9 @@ def test_update_docinfo(vera):
assert Name.Author not in vera.docinfo
-@pytest.mark.parametrize('filename', list((Path(__file__).parent / 'resources').glob('*.pdf')))
+@pytest.mark.parametrize(
+ 'filename', list((Path(__file__).parent / 'resources').glob('*.pdf'))
+)
def test_roundtrip(filename):
try:
pdf = Pdf.open(filename)
@@ -175,6 +186,7 @@ def test_build_metadata(trivial, graph, outdir):
assert xmp_date == docinfo_date.isoformat()
+@needs_libxmp
def test_python_xmp_validate_add(trivial):
with trivial.open_metadata() as xmp:
xmp['dc:creator'] = ['Bob', 'Doug']
@@ -185,9 +197,6 @@ def test_python_xmp_validate_add(trivial):
assert '<rdf:Seq><rdf:li>Bob</rdf:li><rdf:li>Doug</rdf:li>' in xmp_str
assert '<rdf:Bag><rdf:li>Mackenzie</rdf:li>' in xmp_str
- if not XMPMeta:
- pytest.skip(msg='needs libxmp')
-
xmpmeta = XMPMeta(xmp_str=str(xmp))
DC = XMP_NS_DC
assert xmpmeta.does_array_item_exist(DC, 'creator', 'Bob')
@@ -196,6 +205,7 @@ def test_python_xmp_validate_add(trivial):
assert xmpmeta.does_array_item_exist(DC, 'publisher', 'Mackenzie')
+@needs_libxmp
def test_python_xmp_validate_change_list(graph):
with graph.open_metadata() as xmp:
assert 'dc:creator' in xmp
@@ -209,14 +219,13 @@ def test_python_xmp_validate_change_list(graph):
assert xmpmeta.does_array_item_exist(DC, 'creator', 'Kreacher')
+@needs_libxmp
def test_python_xmp_validate_change(sandwich):
with sandwich.open_metadata() as xmp:
assert 'xmp:CreatorTool' in xmp
xmp['xmp:CreatorTool'] = 'Creator' # Exists as a xml tag text
xmp['pdf:Producer'] = 'Producer' # Exists as a tag node
assert str(xmp)
- if not XMPMeta:
- pytest.skip(msg='needs libxmp')
xmpmeta = XMPMeta(xmp_str=str(xmp))
assert xmpmeta.does_property_exist(XMP_NS_XMP, 'CreatorTool')
assert xmpmeta.does_property_exist(XMP_NS_PDF, 'Producer')
@@ -228,7 +237,10 @@ def test_decode_pdf_date():
("20180101010101Z00'00'", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)),
("20180101010101Z", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)),
("20180101010101+0000", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)),
- ("20180101010101+0100", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone(timedelta(hours=1)))),
+ (
+ "20180101010101+0100",
+ datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone(timedelta(hours=1))),
+ ),
]
for s, d in VALS:
assert decode_pdf_date(s) == d
@@ -291,8 +303,10 @@ def test_xpacket_generation(sandwich):
xmpstr2 = sandwich.Root.Metadata.read_bytes()
assert xmpstr2.startswith(xpacket_begin)
+
def only_one_substring(s, subs):
return s.find(subs) == s.rfind(subs)
+
assert only_one_substring(xmpstr2, xpacket_begin)
assert only_one_substring(xmpstr2, xpacket_end)
@@ -318,7 +332,9 @@ def test_remove_attribute_metadata(sandwich):
def test_no_x_xmpmeta(trivial):
- trivial.Root.Metadata = Stream(trivial, b"""
+ trivial.Root.Metadata = Stream(
+ trivial,
+ b"""
<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xmp="http://ns.adobe.com/xap/1.0/">
@@ -334,9 +350,71 @@ def test_no_x_xmpmeta(trivial):
</rdf:Description>
</rdf:RDF>
<?xpacket end="w"?>
- """.strip())
+ """.strip(),
+ )
with trivial.open_metadata() as xmp:
assert xmp._get_rdf_root() is not None
xmp['pdfaid:part'] = '2'
assert xmp['pdfaid:part'] == '2'
+
+
+def test_empty_xmpmeta(trivial):
+ trivial.Root.Metadata = Stream(
+ trivial,
+ b"""<?xpacket begin="" id=""?>
+ <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="">
+ </x:xmpmeta>
+ <?xpacket end=""?>
+ """,
+ )
+ with trivial.open_metadata() as xmp:
+ pass
+
+
+@needs_libxmp
+def test_pdf_version_update(graph, outdir):
+ def get_xmp_version(filename):
+ meta = pikepdf.open(filename).open_metadata()
+ xmp = XMPMeta(xmp_str=str(meta))
+ try:
+ return xmp.get_property('http://ns.adobe.com/pdf/1.3/', 'PDFVersion')
+ except XMPError:
+ return ''
+
+ # We don't update PDFVersion unless it is present, even if we change the PDF version
+ graph.save(
+ outdir / 'empty_xmp_pdfversion.pdf',
+ force_version='1.7',
+ fix_metadata_version=True,
+ )
+ assert get_xmp_version(outdir / 'empty_xmp_pdfversion.pdf') == ''
+
+ # Add PDFVersion field for remaining tests
+ with graph.open_metadata() as m:
+ m['pdf:PDFVersion'] = graph.pdf_version
+
+ # Confirm we don't update the field when the flag is false
+ graph.save(
+ outdir / 'inconsistent_version.pdf',
+ force_version='1.6',
+ fix_metadata_version=False,
+ )
+ assert get_xmp_version(outdir / 'inconsistent_version.pdf') == '1.3'
+
+ # Confirm we update if present
+ graph.save(outdir / 'consistent_version.pdf', force_version='1.5')
+ assert get_xmp_version(outdir / 'consistent_version.pdf') == '1.5'
+
+
+def test_extension_level(trivial, outpdf):
+ trivial.save(outpdf, min_version=('1.6', 314159))
+ pdf = pikepdf.open(outpdf)
+ assert pdf.pdf_version >= '1.6' and pdf.extension_level == 314159
+
+ trivial.save(outpdf, force_version=('1.7', 42))
+ pdf = pikepdf.open(outpdf)
+ assert pdf.pdf_version == '1.7' and pdf.extension_level == 42
+
+ with pytest.raises(TypeError):
+ trivial.save(outpdf, force_version=('1.7', 'invalid extension level'))
diff --git a/tests/test_object.py b/tests/test_object.py
index 5e4b008..8fc5db0 100644
--- a/tests/test_object.py
+++ b/tests/test_object.py
@@ -1,16 +1,34 @@
+import json
+import sys
from decimal import Decimal, InvalidOperation
from math import isclose, isfinite
-import sys
+from zlib import compress
-import pikepdf
-from pikepdf import _qpdf as qpdf
-from pikepdf import (Object, String, Array, Name,
- Dictionary, Operator, PdfError)
-from hypothesis import given, example, assume
-from hypothesis.strategies import (integers, binary, lists, floats,
- characters, recursive, booleans)
import pytest
+from hypothesis import assume, example, given
+from hypothesis.strategies import (
+ binary,
+ booleans,
+ characters,
+ floats,
+ integers,
+ lists,
+ recursive,
+)
+import pikepdf
+from pikepdf import (
+ Array,
+ Dictionary,
+ Name,
+ Object,
+ Operator,
+ PdfError,
+ String,
+ Stream,
+ Pdf,
+)
+from pikepdf import _qpdf as qpdf
# pylint: disable=eval-used,unnecessary-lambda
@@ -27,15 +45,16 @@ def test_booleans():
assert encode(False) == False
-@given(characters(min_codepoint=0x20, max_codepoint=0x7f))
+@given(characters(min_codepoint=0x20, max_codepoint=0x7F))
@example('')
def test_ascii_involution(ascii_):
b = ascii_.encode('ascii')
assert encode(b) == b
-@given(characters(min_codepoint=0x0, max_codepoint=0xfef0,
- blacklist_categories=('Cs',)))
+@given(
+ characters(min_codepoint=0x0, max_codepoint=0xFEF0, blacklist_categories=('Cs',))
+)
@example('')
def test_unicode_involution(s):
assert str(encode(s)) == s
@@ -47,18 +66,20 @@ def test_binary_involution(binary_):
int64s = integers(min_value=-9223372036854775807, max_value=9223372036854775807)
+
+
@given(int64s, int64s)
def test_integer_comparison(a, b):
- equals = (a == b)
- encoded_equals = (encode(a) == encode(b))
+ equals = a == b
+ encoded_equals = encode(a) == encode(b)
assert encoded_equals == equals
- lessthan = (a < b)
- encoded_lessthan = (encode(a) < encode(b))
+ lessthan = a < b
+ encoded_lessthan = encode(a) < encode(b)
assert lessthan == encoded_lessthan
-@given(integers(-10**12, 10**12), integers(0, 12))
+@given(integers(-10 ** 12, 10 ** 12), integers(0, 12))
def test_decimal_involution(num, radix):
strnum = str(num)
if radix > len(strnum):
@@ -85,7 +106,7 @@ def test_decimal_from_float(f):
assert isclose(py_d, d, abs_tol=1e-5), (d, f.hex())
else:
- with pytest.raises(PdfError, message=repr(f)):
+ with pytest.raises(PdfError):
Object.parse(str(d))
@@ -95,13 +116,17 @@ def test_list(array):
assert a == array
-@given(lists(lists(integers(1,10), min_size=1, max_size=5),min_size=1,max_size=5))
+@given(lists(lists(integers(1, 10), min_size=1, max_size=5), min_size=1, max_size=5))
def test_nested_list(array):
a = pikepdf.Array(array)
assert a == array
-@given(recursive(integers(1,10) | booleans(), lambda children: lists(children), max_leaves=20))
+@given(
+ recursive(
+ integers(1, 10) | booleans(), lambda children: lists(children), max_leaves=20
+ )
+)
def test_nested_list2(array):
assume(isinstance(array, list))
a = pikepdf.Array(array)
@@ -125,11 +150,11 @@ def test_stack_depth():
rlimit = sys.getrecursionlimit()
try:
sys.setrecursionlimit(100)
- with pytest.raises(RecursionError, message="recursion"):
+ with pytest.raises(RecursionError):
assert encode(a) == a
- with pytest.raises(RecursionError, message="recursion"):
+ with pytest.raises(RecursionError):
encode(a) == encode(a) # pylint: disable=expression-not-assigned
- with pytest.raises(RecursionError, message="recursion"):
+ with pytest.raises(RecursionError):
repr(a)
finally:
sys.setrecursionlimit(rlimit) # So other tests are not affected
@@ -151,6 +176,11 @@ def test_len_array():
assert len(Array([3])) == 1
+def test_wrap_array():
+ assert Name('/Foo').wrap_in_array() == Array([Name('/Foo')])
+ assert Array([42]).wrap_in_array() == Array([42])
+
+
def test_name_equality():
# Who needs transitivity? :P
# While this is less than ideal ('/Foo' != b'/Foo') it allows for slightly
@@ -174,7 +204,6 @@ def test_forbidden_name_usage():
class TestHashViolation:
-
def check(self, a, b):
assert a == b, "invalid test case"
assert hash(a) == hash(b), "hash violation"
@@ -202,22 +231,23 @@ class TestHashViolation:
def test_not_constructible():
- with pytest.raises(TypeError, message="constructor"):
+ with pytest.raises(TypeError, match="constructor"):
Object()
class TestRepr:
-
def test_repr_dict(self):
- d = Dictionary({
- '/Boolean': True,
- '/Integer': 42,
- '/Real': Decimal('42.42'),
- '/String': String('hi'),
- '/Array': Array([1, 2, 3.14]),
- '/Operator': Operator('q'),
- '/Dictionary': Dictionary({'/Color': 'Red'})
- })
+ d = Dictionary(
+ {
+ '/Boolean': True,
+ '/Integer': 42,
+ '/Real': Decimal('42.42'),
+ '/String': String('hi'),
+ '/Array': Array([1, 2, 3.14]),
+ '/Operator': Operator('q'),
+ '/Dictionary': Dictionary({'/Color': 'Red'}),
+ }
+ )
expected = """\
pikepdf.Dictionary({
"/Array": [ 1, 2, Decimal('3.140000') ],
@@ -245,7 +275,7 @@ class TestRepr:
Decimal('3.14'),
String('scalar'),
Name('/Bob'),
- Operator('Q')
+ Operator('Q'),
]
for s in scalars:
assert eval(repr(s)) == s
@@ -262,12 +292,8 @@ def test_utf16_error():
class TestDictionary:
-
def test_dictionary_contains(self):
- d = Dictionary({
- '/Monty': 'Python',
- '/Flying': 'Circus'
- })
+ d = Dictionary({'/Monty': 'Python', '/Flying': 'Circus'})
assert Name.Flying in d
assert Name('/Monty') in d
assert Name.Brian not in d
@@ -298,10 +324,12 @@ class TestDictionary:
for k in d.items():
pass
+
def test_not_convertible():
class PurePythonObj:
def __repr__(self):
return 'PurePythonObj()'
+
c = PurePythonObj()
with pytest.raises(RuntimeError):
encode(c)
@@ -311,3 +339,80 @@ def test_not_convertible():
d = pikepdf.Dictionary()
with pytest.raises(RuntimeError):
d.SomeKey = c
+
+
+def test_json():
+ d = Dictionary(
+ {
+ '/Boolean': True,
+ '/Integer': 42,
+ '/Real': Decimal('42.42'),
+ '/String': String('hi'),
+ '/Array': Array([1, 2, 3.14]),
+ '/Dictionary': Dictionary({'/Color': 'Red'}),
+ }
+ )
+ json_bytes = d.to_json(False)
+ try:
+ as_dict = json.loads(json_bytes)
+ except TypeError:
+ as_dict = json.loads(json_bytes.decode('utf-8')) # Py3.5 shim
+ assert as_dict == {
+ "/Array": [1, 2, 3.140000],
+ "/Boolean": True,
+ "/Dictionary": {"/Color": "Red"},
+ "/Integer": 42,
+ "/Real": 42.42,
+ "/String": "hi",
+ }
+
+
+@pytest.fixture
+def stream_object():
+ pdf = pikepdf.new()
+ return Stream(pdf, b'')
+
+
+@pytest.fixture
+def sandwich(resources):
+ return Pdf.open(resources / 'sandwich.pdf')
+
+
+class TestObjectWrite:
+ def test_basic(self, stream_object):
+ stream_object.write(b'abc')
+ assert stream_object.read_bytes() == b'abc'
+
+ def test_compressed_readback(self, stream_object):
+ stream_object.write(compress(b'def'), filter=Name.FlateDecode)
+ assert stream_object.read_bytes() == b'def'
+
+ def test_stacked_compression(self, stream_object):
+ double_compressed = compress(compress(b'pointless'))
+ stream_object.write(
+ double_compressed, filter=[Name.FlateDecode, Name.FlateDecode]
+ )
+ assert stream_object.read_bytes() == b'pointless'
+ assert stream_object.read_raw_bytes() == double_compressed
+
+ def test_explicit_decodeparms(self, stream_object):
+ double_compressed = compress(compress(b'pointless'))
+ stream_object.write(
+ double_compressed,
+ filter=[Name.FlateDecode, Name.FlateDecode],
+ decode_parms=[None, None],
+ )
+ assert stream_object.read_bytes() == b'pointless'
+ assert stream_object.read_raw_bytes() == double_compressed
+
+ def test_no_kwargs(self, stream_object):
+ with pytest.raises(TypeError):
+ stream_object.write(compress(b'x'), [Name.FlateDecode])
+
+ def test_ccitt(self, sandwich, stream_object):
+ ccitt = b'\x00' # Not valid data, just for testing decode_parms
+ stream_object.write(
+ ccitt,
+ filter=Name.CCITTFaxDecode,
+ decode_parms=Dictionary(K=-1, Columns=8, Length=1),
+ )
diff --git a/tests/test_pages.py b/tests/test_pages.py
index a542250..c3b2ec9 100644
--- a/tests/test_pages.py
+++ b/tests/test_pages.py
@@ -1,12 +1,16 @@
-import pytest
-from pikepdf import Pdf, Stream, PdfMatrix
-
+import gc
from contextlib import suppress
from shutil import copy
-import gc
-
from sys import getrefcount as refcount
+import pytest
+
+from pikepdf import Pdf, PdfMatrix, Stream
+
+
+# pylint: disable=redefined-outer-name,pointless-statement
+
+
@pytest.fixture
def graph(resources):
return Pdf.open(resources / 'graph.pdf')
@@ -47,12 +51,17 @@ def test_delete_last_page(graph, outdir):
def test_replace_page(graph, fourpages):
q = fourpages
q2 = graph
+ q2.pages[0].CropBox = [0, 0, 500, 500]
+
+ # Ensure the page keys are different, not subsets
+ assert q.pages[1].keys() - q2.pages[0].keys()
+ assert q2.pages[0].keys() - q.pages[1].keys()
assert len(q.pages) == 4
q.pages[1] = q2.pages[0]
assert len(q.pages) == 4
- assert q.pages[1].Resources.XObject.keys() == \
- q2.pages[0].Resources.XObject.keys()
+ assert q.pages[1].keys() == q2.pages[0].keys()
+ assert q.pages[1].Resources.XObject.keys() == q2.pages[0].Resources.XObject.keys()
def test_hard_replace_page(fourpages, graph, sandwich, outdir):
@@ -98,11 +107,11 @@ def test_evil_page_deletion(resources, outdir):
assert refcount(src) == 2
pdf.pages.append(src.pages[0])
- assert refcount(src) == 3
+ assert refcount(src) == 2
del src.pages[0]
gc.collect()
- assert refcount(src) == 3
+ assert refcount(src) == 2
with suppress(PermissionError): # Fails on Windows
(outdir / 'sandwich.pdf').unlink()
@@ -115,9 +124,6 @@ def test_evil_page_deletion(resources, outdir):
pdf.save(outdir / 'out_nopages.pdf')
del pdf
gc.collect()
- # Ideally we'd see the check_refcount(src, 2) at this point, but we don't
- # have a way to find out when a PDF can be closed if a page was copied out
- # of it to another PDF
def test_append_all(sandwich, fourpages, outdir):
@@ -154,10 +160,12 @@ def test_slice_unequal_replacement(fourpages, sandwich, outdir):
assert len(pdf.pages) == 2, "number of pages must be changed"
pdf.save(outdir / 'out.pdf')
- assert pdf.pages[0].Contents.Length == page0_content_len, \
- "page 0 should be unchanged"
- assert pdf.pages[1].Contents.Length != page1_content_len, \
- "page 1's contents should have changed"
+ assert (
+ pdf.pages[0].Contents.Length == page0_content_len
+ ), "page 0 should be unchanged"
+ assert (
+ pdf.pages[1].Contents.Length != page1_content_len
+ ), "page 1's contents should have changed"
def test_slice_with_step(fourpages, sandwich, outdir):
@@ -171,24 +179,21 @@ def test_slice_with_step(fourpages, sandwich, outdir):
pdf.pages[0::2] = pdf2.pages
pdf.save(outdir / 'out.pdf')
- assert all(page.Contents.Length == pdf2_content_len
- for page in pdf.pages[0::2])
+ assert all(page.Contents.Length == pdf2_content_len for page in pdf.pages[0::2])
def test_slice_differing_lengths(fourpages, sandwich):
pdf = fourpages
pdf2 = sandwich
- with pytest.raises(ValueError,
- message="attempt to assign"):
+ with pytest.raises(ValueError, match="attempt to assign"):
pdf.pages[0::2] = pdf2.pages[0:1]
@pytest.mark.timeout(1)
def test_self_extend(fourpages):
pdf = fourpages
- with pytest.raises(ValueError,
- message="source page list modified during iteration"):
+ with pytest.raises(ValueError, match="source page list modified during iteration"):
pdf.pages.extend(pdf.pages)
@@ -240,3 +245,26 @@ def test_negative_indexing(fourpages, graph):
fourpages.pages[-42] = graph.pages[0]
with pytest.raises(IndexError):
del fourpages.pages[-42]
+
+
+def test_concatenate(resources, outdir):
+ # Issue #22
+ def concatenate(n):
+ print('concatenating same page', n, 'times')
+ output_pdf = Pdf.new()
+ for i in range(n):
+ print(i)
+ pdf_page = Pdf.open(resources / 'pal.pdf')
+ output_pdf.pages.extend(pdf_page.pages)
+ output_pdf.save(outdir / '{}.pdf'.format(n))
+
+ concatenate(5)
+
+
+def test_emplace(fourpages):
+ p0_objgen = fourpages.pages[0].objgen
+ fourpages.pages[0].emplace(fourpages.pages[1])
+ assert p0_objgen == fourpages.pages[0].objgen
+ assert fourpages.pages[0].keys() == fourpages.pages[1].keys()
+ for k in fourpages.pages[0].keys():
+ assert fourpages.pages[0][k] == fourpages.pages[1][k]
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index fac0ccd..f79e8f1 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -1,18 +1,16 @@
+import shutil
+from subprocess import PIPE, run
import sys
import pytest
-from pikepdf import (
- parse_content_stream, Pdf, Stream, Operator, Object,
- Dictionary
-)
-from pikepdf.models import _Page as Page
-from pikepdf._qpdf import StreamParser
-from subprocess import run, PIPE
-import shutil
+from pikepdf import Dictionary, Object, Operator, Pdf, Stream, parse_content_stream
+from pikepdf._qpdf import StreamParser
+from pikepdf.models import _Page as Page
# pylint: disable=useless-super-delegation
+
class PrintParser(StreamParser):
def __init__(self):
super().__init__()
@@ -48,15 +46,15 @@ def test_parser_exception(resources):
Object._parse_stream(stream, ExceptionParser())
-@pytest.mark.skipif(
- shutil.which('pdftotext') is None,
- reason="poppler not installed")
+@pytest.mark.skipif(shutil.which('pdftotext') is None, reason="poppler not installed")
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="subprocess.run on 3.5")
def test_text_filter(resources, outdir):
input_pdf = resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf'
# Ensure the test PDF has detect we can find
- proc = run(['pdftotext', str(input_pdf), '-'],
- check=True, stdout=PIPE, encoding='utf-8')
+ proc = run(
+ ['pdftotext', str(input_pdf), '-'], check=True, stdout=PIPE, encoding='utf-8'
+ )
assert proc.stdout.strip() != '', "Need input test file that contains text"
pdf = Pdf.open(input_pdf)
@@ -76,8 +74,12 @@ def test_text_filter(resources, outdir):
pdf.save(outdir / 'notext.pdf', True)
- proc = run(['pdftotext', str(outdir / 'notext.pdf'), '-'],
- check=True, stdout=PIPE, encoding='utf-8')
+ proc = run(
+ ['pdftotext', str(outdir / 'notext.pdf'), '-'],
+ check=True,
+ stdout=PIPE,
+ encoding='utf-8',
+ )
assert proc.stdout.strip() == '', "Expected text to be removed"
@@ -87,13 +89,16 @@ def test_invalid_stream_object():
parse_content_stream(Dictionary({"/Hi": 3}))
-@pytest.mark.parametrize("test_file,expected", [
- ("fourpages.pdf", True),
- ("graph.pdf", False),
- ("veraPDF test suite 6-2-10-t02-pass-a.pdf", True),
- ("veraPDF test suite 6-2-3-3-t01-fail-c.pdf", False),
- ('sandwich.pdf', True)
-])
+@pytest.mark.parametrize(
+ "test_file,expected",
+ [
+ ("fourpages.pdf", True),
+ ("graph.pdf", False),
+ ("veraPDF test suite 6-2-10-t02-pass-a.pdf", True),
+ ("veraPDF test suite 6-2-3-3-t01-fail-c.pdf", False),
+ ('sandwich.pdf', True),
+ ],
+)
def test_has_text(resources, test_file, expected):
pdf = Pdf.open(resources / test_file)
for p in pdf.pages:
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index 33b949b..abe93cc 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -2,22 +2,33 @@
Testing focused on pikepdf.Pdf
"""
-import pytest
-from pikepdf import Pdf, PasswordError, Stream, PdfError
-
-import sys
import os
+import shutil
+import sys
from io import StringIO
+from pathlib import Path
from unittest.mock import Mock, patch
-import shutil
+
+import pytest
+
+import pikepdf
+from pikepdf import PasswordError, Pdf, PdfError, Stream
from pikepdf._cpphelpers import fspath # For py35
+# pylint: disable=redefined-outer-name
+
+
@pytest.fixture
def trivial(resources):
return Pdf.open(resources / 'pal-1bit-trivial.pdf')
+def test_new(outdir):
+ pdf = pikepdf.new()
+ pdf.save(outdir / 'new-empty.pdf')
+
+
def test_non_filename():
with pytest.raises(TypeError):
Pdf.open(42)
@@ -73,6 +84,15 @@ class TestPasswords:
Pdf.open(resources / 'graph-encrypted.pdf')
+class TestPermissions:
+ def test_some_permissions_missing(self, resources):
+ pdf = Pdf.open(resources / 'graph-encrypted.pdf', 'owner')
+ assert pdf.allow.print_highres == pdf.allow.modify_annotation == False
+
+ def test_permissions_all_true_not_encrypted(self, trivial):
+ assert all(trivial.allow.values())
+
+
class TestStreams:
def test_stream(self, resources):
with (resources / 'pal-1bit-trivial.pdf').open('rb') as stream:
@@ -86,6 +106,7 @@ class TestStreams:
def test_save_stream(self, trivial, outdir):
from io import BytesIO
+
pdf = trivial
pdf.save(outdir / 'nostream.pdf', static_id=True)
@@ -123,8 +144,7 @@ def test_show_xref(trivial):
trivial.show_xref_table()
-@pytest.mark.skipif(sys.version_info < (3, 6),
- reason='missing mock.assert_called')
+@pytest.mark.skipif(sys.version_info < (3, 6), reason='missing mock.assert_called')
def test_progress(trivial, outdir):
pdf = trivial
mock = Mock()
@@ -135,10 +155,7 @@ def test_progress(trivial, outdir):
def test_unicode_filename(resources, outdir):
target1 = outdir / '测试.pdf'
target2 = outdir / '通过考试.pdf'
- shutil.copy(
- fspath(resources / 'pal-1bit-trivial.pdf'),
- fspath(target1)
- )
+ shutil.copy(fspath(resources / 'pal-1bit-trivial.pdf'), fspath(target1))
pdf = Pdf.open(target1)
pdf.save(target2)
assert target2.exists()
@@ -149,12 +166,12 @@ def test_fileno_fails(resources):
with patch('os.dup') as dup:
dup.side_effect = OSError('assume dup fails')
with pytest.raises(OSError):
- pdf = Pdf.open(resources / 'pal-1bit-trivial.pdf')
+ Pdf.open(resources / 'pal-1bit-trivial.pdf')
with patch('os.dup') as dup:
dup.return_value = -1
with pytest.raises(RuntimeError):
- pdf = Pdf.open(resources / 'pal-1bit-trivial.pdf')
+ Pdf.open(resources / 'pal-1bit-trivial.pdf')
def test_min_and_force_version(trivial, outdir):
@@ -175,3 +192,51 @@ def test_min_and_force_version(trivial, outdir):
def test_normalize_linearize(trivial, outdir):
with pytest.raises(ValueError):
trivial.save(outdir / 'no.pdf', linearize=True, normalize_content=True)
+
+
+def test_make_stream(trivial, outdir):
+ pdf = trivial
+ stream = pdf.make_stream(b'q Q')
+ pdf.pages[0].Contents = stream
+ pdf.save(outdir / 's.pdf')
+
+
+def test_add_blank_page(trivial):
+ assert len(trivial.pages) == 1
+
+ invalid = [-1, 0, 2, 15000]
+ for n in invalid:
+ with pytest.raises(ValueError):
+ trivial.add_blank_page(page_size=(n, n))
+ trivial.add_blank_page()
+ assert len(trivial.pages) == 2
+
+
+def test_object_stream_mode_generated(trivial, outdir):
+ trivial.save(
+ outdir / '1.pdf',
+ fix_metadata_version=True,
+ object_stream_mode=pikepdf.ObjectStreamMode.generate,
+ )
+ assert b'/ObjStm' in (outdir / '1.pdf').read_bytes()
+
+ trivial.save(
+ outdir / '2.pdf',
+ fix_metadata_version=False,
+ object_stream_mode=pikepdf.ObjectStreamMode.generate,
+ )
+ assert b'/ObjStm' in (outdir / '2.pdf').read_bytes()
+
+
+def test_with_block(resources):
+ desc = ''
+ with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
+ desc = pdf.filename
+ assert pdf.filename != desc
+
+
+def test_with_block_abuse(resources):
+ with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
+ im0 = pdf.pages[0].Resources.XObject['/Im0']
+ with pytest.raises(PdfError):
+ im0.read_bytes()
diff --git a/tests/test_pdfa.py b/tests/test_pdfa.py
index 975b258..305b2d1 100644
--- a/tests/test_pdfa.py
+++ b/tests/test_pdfa.py
@@ -1,9 +1,11 @@
-import pytest
-from pikepdf import Pdf
import os
-from pathlib import Path
-from subprocess import run, PIPE, STDOUT
import xml.etree.ElementTree as ET
+from pathlib import Path
+from subprocess import PIPE, STDOUT, run
+
+import pytest
+
+from pikepdf import Pdf
try:
VERAPDF = Path(os.environ['HOME']) / 'verapdf' / 'verapdf'
diff --git a/tests/test_private_pdfs.py b/tests/test_private_pdfs.py
index e407fa2..25fdff9 100644
--- a/tests/test_private_pdfs.py
+++ b/tests/test_private_pdfs.py
@@ -1,13 +1,11 @@
+import gzip
+from pathlib import Path
+
import pytest
+
from pikepdf import Pdf, PdfError
-import os
-import platform
-import shutil
-from contextlib import suppress
-from shutil import copy
-import gzip
-from pathlib import Path
+# pylint: disable=redefined-outer-name
# Files with unknown copyright status can't be shared publicly
@@ -20,8 +18,8 @@ def private():
pytestmark = pytest.mark.skipif(
- not PRIVATE_RESOURCES.is_dir(),
- reason='private resources not available')
+ not PRIVATE_RESOURCES.is_dir(), reason='private resources not available'
+)
def test_pypdf2_issue_361(private):
diff --git a/tests/test_refcount.py b/tests/test_refcount.py
index a1b8912..879a7a9 100644
--- a/tests/test_refcount.py
+++ b/tests/test_refcount.py
@@ -1,14 +1,13 @@
import gc
-import sys
-import pytest
-from pikepdf import Pdf
-
-# This will break on pypy, but we're not quite targetting pypy...
from sys import getrefcount as refcount
+import pytest
+
+from pikepdf import Pdf
# Try to do some things without blowing up
+
def test_access_image(resources):
pdf = Pdf.open(resources / 'congress.pdf')
assert refcount(pdf) == 2 # refcount is always +1
@@ -66,7 +65,7 @@ def test_transfer_page(resources):
pdf2.pages.insert(2, page0)
p2p2 = pdf2.pages[2]
- assert refcount(pdf) == 4 # this, pdf, page0->pdf, pdf2's page0
+ assert refcount(pdf) == 3 # this, pdf, page0->pdf
assert refcount(p2p2) == 2
del pdf
diff --git a/tests/test_sanity.py b/tests/test_sanity.py
index df1f387..dcafb7c 100644
--- a/tests/test_sanity.py
+++ b/tests/test_sanity.py
@@ -2,19 +2,25 @@
A bunch of quick tests that confirm nothing is horribly wrong
"""
-import pytest
-
import gc
from contextlib import suppress
from shutil import copy
-import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError
+from io import BytesIO
+import threading
+import os
+import time
+import signal
+
+import pytest
import pikepdf
-from pikepdf import Pdf, Object, Name, Stream
+from pikepdf import Name, Object, Pdf, Stream
def test_minimum_qpdf_version():
from pikepdf import _qpdf
+
assert _qpdf.qpdf_version() >= '7.0.0'
@@ -39,20 +45,24 @@ def test_create_pdf(outdir):
pdf = Pdf.new()
font = pdf.make_indirect(
- Object.parse(b"""
+ Object.parse(
+ b"""
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
- >>"""))
+ >>"""
+ )
+ )
width, height = 100, 100
image_data = b"\xff\x7f\x00" * (width * height)
image = Stream(pdf, image_data)
- image.stream_dict = Object.parse(b"""
+ image.stream_dict = Object.parse(
+ b"""
<<
/Type /XObject
/Subtype /Image
@@ -60,16 +70,14 @@ def test_create_pdf(outdir):
/BitsPerComponent 8
/Width 100
/Height 100
- >>""")
+ >>"""
+ )
rfont = {'/F1': font}
xobj = {'/Im1': image}
- resources = {
- '/Font': rfont,
- '/XObject': xobj
- }
+ resources = {'/Font': rfont, '/XObject': xobj}
mediabox = [0, 0, 612, 792]
@@ -84,8 +92,8 @@ def test_create_pdf(outdir):
'/Type': Name('/Page'),
'/MediaBox': mediabox,
'/Contents': contents,
- '/Resources': resources
- }
+ '/Resources': resources,
+ }
qpdf_page_dict = page_dict
page = pdf.make_indirect(qpdf_page_dict)
@@ -125,7 +133,9 @@ def test_open_save(resources, outdir):
out = str(outdir / 'graph.pdf')
copy(str(resources / 'graph.pdf'), out)
src = Pdf.open(out)
- src.save(out)
+ with pytest.raises(ValueError):
+ src.save(out)
+ src.save(outdir / 'graph2.pdf')
def test_readme_example(resources, outdir):