diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/conftest.py | 15 | ||||
-rw-r--r-- | tests/resources/1biticc.pdf | bin | 0 -> 5661 bytes | |||
-rw-r--r-- | tests/resources/graph-encrypted.pdf | bin | 293636 -> 296661 bytes | |||
-rw-r--r-- | tests/resources/pike-flate-jp2.pdf | bin | 0 -> 18471 bytes | |||
-rw-r--r-- | tests/test_codec.py | 16 | ||||
-rw-r--r-- | tests/test_dictionary.py | 6 | ||||
-rw-r--r-- | tests/test_encrypt.py | 128 | ||||
-rw-r--r-- | tests/test_formxobject.py | 45 | ||||
-rw-r--r-- | tests/test_image_access.py | 143 | ||||
-rw-r--r-- | tests/test_io.py | 26 | ||||
-rw-r--r-- | tests/test_ipython.py | 3 | ||||
-rw-r--r-- | tests/test_metadata.py | 132 | ||||
-rw-r--r-- | tests/test_object.py | 185 | ||||
-rw-r--r-- | tests/test_pages.py | 72 | ||||
-rw-r--r-- | tests/test_parsers.py | 49 | ||||
-rw-r--r-- | tests/test_pdf.py | 91 | ||||
-rw-r--r-- | tests/test_pdfa.py | 10 | ||||
-rw-r--r-- | tests/test_private_pdfs.py | 16 | ||||
-rw-r--r-- | tests/test_refcount.py | 11 | ||||
-rw-r--r-- | tests/test_sanity.py | 40 |
20 files changed, 763 insertions, 225 deletions
diff --git a/tests/conftest.py b/tests/conftest.py index 8a67e83..8887415 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,8 @@ import os import sys -import platform - -pytest_plugins = ['helpers_namespace'] +from pathlib import Path import pytest -from pathlib import Path -from subprocess import Popen, PIPE if sys.version_info < (3, 4): @@ -24,5 +20,10 @@ def resources(): @pytest.fixture(scope="function") -def outdir(tmpdir): - return Path(str(tmpdir)) +def outdir(tmp_path): + return tmp_path + + +@pytest.fixture(scope="function") +def outpdf(tmp_path): + return tmp_path / 'out.pdf' diff --git a/tests/resources/1biticc.pdf b/tests/resources/1biticc.pdf Binary files differnew file mode 100644 index 0000000..b33b4bb --- /dev/null +++ b/tests/resources/1biticc.pdf diff --git a/tests/resources/graph-encrypted.pdf b/tests/resources/graph-encrypted.pdf Binary files differindex 6e086af..6c9741e 100644 --- a/tests/resources/graph-encrypted.pdf +++ b/tests/resources/graph-encrypted.pdf diff --git a/tests/resources/pike-flate-jp2.pdf b/tests/resources/pike-flate-jp2.pdf Binary files differnew file mode 100644 index 0000000..c074e69 --- /dev/null +++ b/tests/resources/pike-flate-jp2.pdf diff --git a/tests/test_codec.py b/tests/test_codec.py new file mode 100644 index 0000000..c101d8c --- /dev/null +++ b/tests/test_codec.py @@ -0,0 +1,16 @@ +import pytest + +import pikepdf.codec + + +def test_encode(): + assert 'abc'.encode('pdfdoc') == b'abc' + with pytest.raises(ValueError): + '你好'.encode('pdfdoc') + assert '你好 world'.encode('pdfdoc', 'replace') == b'?? world' + assert '你好 world'.encode('pdfdoc', 'ignore') == b' world' + + +def test_decode(): + assert b'A'.decode('pdfdoc') == 'A' + assert b'\xa0'.decode('pdfdoc') == '€' diff --git a/tests/test_dictionary.py b/tests/test_dictionary.py index 5341968..78a558a 100644 --- a/tests/test_dictionary.py +++ b/tests/test_dictionary.py @@ -1,6 +1,10 @@ -from pikepdf import Pdf import pytest +from pikepdf import Pdf + + +# pylint: disable=redefined-outer-name,pointless-statement,expression-not-assigned + @pytest.fixture def congress(resources): diff --git a/tests/test_encrypt.py b/tests/test_encrypt.py new file mode 100644 index 0000000..50b4e8d --- /dev/null +++ b/tests/test_encrypt.py @@ -0,0 +1,128 @@ +import pytest + +import pikepdf + + +@pytest.fixture +def trivial(resources): + return pikepdf.open(resources / 'pal-1bit-trivial.pdf') + + +@pytest.fixture +def graph_encrypted(resources): + return pikepdf.open(resources / 'graph-encrypted.pdf', password='owner') + + +@pytest.mark.parametrize( + "R,owner,user", + [ + (6, "foo", "bar"), + (4, "password", "password"), + (3, "12345678", "secret"), + (2, "qwerty", "123456"), + ], +) +def test_encrypt_basic(trivial, outpdf, R, owner, user): + trivial.save(outpdf, encryption=dict(R=R, owner=owner, user=user)) + pdf_owner = pikepdf.open(outpdf, password=owner) + assert pdf_owner.is_encrypted + pdf_user = pikepdf.open(outpdf, password=user) + assert pdf_user.is_encrypted + + +def test_encrypt_R5(trivial, outpdf): + with pytest.warns(UserWarning): + trivial.save(outpdf, encryption=dict(R=5, owner='foo', user='foo')) + + +@pytest.mark.parametrize("R", [-1, 0, 1, 7, 9, 42]) +def test_encrypt_invalid_level_value(trivial, outpdf, R): + with pytest.raises(ValueError): + trivial.save(outpdf, encryption=dict(R=R, owner='foo', user='foo')) + + +@pytest.mark.parametrize("R", [3.14, '6', b'6', None]) +def test_encrypt_invalid_level(trivial, outpdf, R): + with pytest.raises(TypeError): + trivial.save(outpdf, encryption=dict(R=R, owner='foo', user='foo')) + + +def test_encrypt_without_owner(trivial, outpdf): + trivial.save(outpdf, encryption=dict(user='foo')) + + +def test_encrypt_no_passwords(trivial, outpdf): + trivial.save(outpdf, encryption=dict(R=6)) + + +def test_encrypt_permissions_deny(trivial, outpdf): + perms = pikepdf.models.Permissions(extract=False) + trivial.save( + outpdf, encryption=pikepdf.Encryption(owner='sun', user='moon', allow=perms) + ) + pdf = pikepdf.open(outpdf, password='sun') + assert not pdf.allow.extract + assert pdf.allow.modify_form + + +def test_encrypt_info(trivial, outpdf): + trivial.save(outpdf, encryption=dict(R=4, owner='foo', user='bar')) + pdf = pikepdf.open(outpdf, password='foo') + assert pdf.encryption.user_password == b'bar' + assert pdf.encryption.bits == 128 + + +@pytest.mark.parametrize( + "R,owner,user,aes,metadata,err", + [ + (6, "foo", "bar", 42, False, r"aes.*bool"), + (6, "password", "password", True, 42, r"metadata.*bool"), + (3, "12345678", "secret", False, True, r"metadata.*R < 4"), + (2, "qwerty", "123456", True, False, r"AES.*R < 4"), + (6, "rc4", "rc4", False, True, r"R = 6.*AES"), + (4, "met", "met", False, True, r"unless AES"), + ], +) +def test_bad_settings(trivial, outpdf, R, owner, user, aes, metadata, err): + with pytest.raises(Exception, match=err): + trivial.save( + outpdf, + encryption=pikepdf.Encryption( + R=R, owner=owner, user=user, aes=aes, metadata=metadata + ), + ) + + +def test_block_encryption_and_normalize(trivial, outpdf): + with pytest.raises(ValueError, match=r'encryption and normalize_content'): + trivial.save( + outpdf, + encryption=pikepdf.Encryption(owner='foo', user='bar'), + normalize_content=True, + ) + + +def test_consistency_saving_removes_encryption(graph_encrypted, outpdf): + # This was not intended behavior. It's a side effect of unconditionally calling + # w.setDecodeLevel(), which disables preserving encryption in + # QPDFWriter::doWriteSetup() + graph_encrypted.save(outpdf) + with pikepdf.open(outpdf) as pdf: + assert not pdf.is_encrypted + + +def test_save_without_encryption(graph_encrypted, outpdf): + graph_encrypted.save(outpdf, encryption=False) + with pikepdf.open(outpdf) as pdf: + assert not pdf.is_encrypted + + +def test_save_preserve_encryption(graph_encrypted, outpdf): + graph_encrypted.save(outpdf, encryption=True) + with pikepdf.open(outpdf, 'owner') as pdf: + assert pdf.is_encrypted + + +def test_preserve_encryption_not_encrypted(trivial, outpdf): + with pytest.raises(ValueError): + trivial.save(outpdf, encryption=True) diff --git a/tests/test_formxobject.py b/tests/test_formxobject.py index f402d76..7e252c9 100644 --- a/tests/test_formxobject.py +++ b/tests/test_formxobject.py @@ -1,5 +1,6 @@ import pytest -from pikepdf import Pdf, Object, Stream, Name, Dictionary + +from pikepdf import Dictionary, Name, Object, Pdf, Stream # pylint: disable=e1137 @@ -8,7 +9,8 @@ def test_create_form_xobjects(outdir): pdf = Pdf.new() font = pdf.make_indirect( - Object.parse(b""" + Object.parse( + b""" << /Type /Font /Subtype /Type1 @@ -16,14 +18,16 @@ def test_create_form_xobjects(outdir): /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> - """) + """ + ) ) width, height = 100, 100 image_data = b"\xff\x7f\x00" * (width * height) image = Stream(pdf, image_data) - image.stream_dict = Object.parse(""" + image.stream_dict = Object.parse( + """ << /Type /XObject /Subtype /Image @@ -32,15 +36,17 @@ def test_create_form_xobjects(outdir): /Width 100 /Height 100 >> - """) + """ + ) xobj_image = Dictionary({'/Im1': image}) - form_xobj_res = Dictionary({ - '/XObject': xobj_image - }) - form_xobj = Stream(pdf, b""" + form_xobj_res = Dictionary({'/XObject': xobj_image}) + form_xobj = Stream( + pdf, + b""" /Im1 Do - """) + """, + ) form_xobj['/Type'] = Name('/XObject') form_xobj['/Subtype'] = Name('/Form') form_xobj['/FormType'] = 1 @@ -50,10 +56,7 @@ def test_create_form_xobjects(outdir): rfont = {'/F1': font} - resources = { - '/Font': rfont, - '/XObject': {'/Form1': form_xobj}, - } + resources = {'/Font': rfont, '/XObject': {'/Form1': form_xobj}} mediabox = [0, 0, 612, 792] @@ -65,12 +68,14 @@ def test_create_form_xobjects(outdir): contents = Stream(pdf, stream) - page = pdf.make_indirect({ - '/Type': Name('/Page'), - '/MediaBox': mediabox, - '/Contents': contents, - '/Resources': resources - }) + page = pdf.make_indirect( + { + '/Type': Name('/Page'), + '/MediaBox': mediabox, + '/Contents': contents, + '/Resources': resources, + } + ) pdf.pages.append(page) pdf.save(outdir / 'formxobj.pdf') diff --git a/tests/test_image_access.py b/tests/test_image_access.py index 05fa010..113a5ef 100644 --- a/tests/test_image_access.py +++ b/tests/test_image_access.py @@ -1,16 +1,28 @@ -import pytest -import imghdr -from io import BytesIO -from PIL import Image, features as PIL_features import zlib +from io import BytesIO +from pathlib import Path -# pylint: disable=w0621 - +import pytest +from PIL import Image +from PIL import features as PIL_features from pikepdf import ( - Pdf, PdfImage, PdfError, Name, - parse_content_stream, PdfInlineImage, Stream, StreamDecodeLevel + Array, + Dictionary, + Name, + Pdf, + PdfError, + PdfImage, + PdfInlineImage, + Stream, + StreamDecodeLevel, + parse_content_stream, ) +from pikepdf._cpphelpers import fspath +from pikepdf.models.image import UnsupportedImageTypeError + + +# pylint: disable=redefined-outer-name def first_image_in(filename): @@ -55,10 +67,7 @@ def test_image_replace(congress, outdir): grayscale = pillowimage.convert('L') grayscale = grayscale.resize((4, 4)) # So it is not obnoxious on error - congress[0].write( - zlib.compress(grayscale.tobytes()), - filter=Name("/FlateDecode") - ) + congress[0].write(zlib.compress(grayscale.tobytes()), filter=Name("/FlateDecode")) congress[0].ColorSpace = Name("/DeviceGray") pdf = congress[1] pdf.save(outdir / 'congress_gray.pdf') @@ -69,7 +78,8 @@ def test_lowlevel_jpeg(congress): with pytest.raises(PdfError): congress[0].read_bytes() - assert imghdr.what('', h=raw_bytes) == 'jpeg' + im = Image.open(BytesIO(raw_bytes)) + assert im.format == 'JPEG' pim = PdfImage(congress[0]) b = BytesIO() @@ -89,8 +99,7 @@ def test_lowlevel_replace_jpeg(congress, outdir): grayscale = grayscale.resize((4, 4)) # So it is not obnoxious on error congress[0].write( - zlib.compress(grayscale.tobytes()[:10]), - filter=Name("/FlateDecode") + zlib.compress(grayscale.tobytes()[:10]), filter=Name("/FlateDecode") ) congress[0].ColorSpace = Name('/DeviceGray') @@ -121,11 +130,14 @@ def test_bits_per_component_missing(congress): assert PdfImage(congress[0]).bits_per_component == 8 -@pytest.mark.parametrize('w,h,pixeldata,cs,bpc', [ - (1, 1, b'\xff', '/DeviceGray', 1), - (1, 1, b'\xf0', '/DeviceGray', 8), - (1, 1, b'\xff\x00\xff', '/DeviceRGB', 8) -]) +@pytest.mark.parametrize( + 'w,h,pixeldata,cs,bpc', + [ + (1, 1, b'\xff', '/DeviceGray', 1), + (1, 1, b'\xf0', '/DeviceGray', 8), + (1, 1, b'\xff\x00\xff', '/DeviceRGB', 8), + ], +) def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc): pdf = Pdf.new() @@ -149,16 +161,15 @@ def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc): '/Type': Name('/Page'), '/MediaBox': mediabox, '/Contents': contents, - '/Resources': resources + '/Resources': resources, } page = pdf.make_indirect(page_dict) pdf.pages.append(page) - outfile = outdir / 'test{w}{h}{cs}{bpc}.pdf'.format( - w=w, h=h, cs=cs[1:], bpc=bpc + outfile = outdir / 'test{w}{h}{cs}{bpc}.pdf'.format(w=w, h=h, cs=cs[1:], bpc=bpc) + pdf.save( + outfile, compress_streams=False, stream_decode_level=StreamDecodeLevel.none ) - pdf.save(outfile, compress_streams=False, - stream_decode_level=StreamDecodeLevel.none) p2 = pdf.open(outfile) pim = PdfImage(p2.pages[0].Resources.XObject['/Im1']) @@ -185,16 +196,17 @@ def test_image_roundtrip(outdir, w, h, pixeldata, cs, bpc): assert pim.mode == im.mode -@pytest.mark.parametrize('filename,bpc,filters,ext,mode,format', +@pytest.mark.parametrize( + 'filename,bpc,filters,ext,mode,format_', [ ('sandwich.pdf', 1, ['/CCITTFaxDecode'], '.tif', '1', 'TIFF'), ('congress-gray.pdf', 8, ['/DCTDecode'], '.jpg', 'L', 'JPEG'), ('congress.pdf', 8, ['/DCTDecode'], '.jpg', 'RGB', 'JPEG'), - ('cmyk-jpeg.pdf', 8, ['/DCTDecode'], '.jpg', 'CMYK', 'JPEG') - ] + ('cmyk-jpeg.pdf', 8, ['/DCTDecode'], '.jpg', 'CMYK', 'JPEG'), + ], ) -def test_direct_extract(resources, filename, bpc, filters, ext, mode, format): - xobj, pdf = first_image_in(resources / filename) +def test_direct_extract(resources, filename, bpc, filters, ext, mode, format_): + xobj, _pdf = first_image_in(resources / filename) pim = PdfImage(xobj) assert pim.bits_per_component == bpc @@ -207,14 +219,19 @@ def test_direct_extract(resources, filename, bpc, filters, ext, mode, format): im = Image.open(outstream) assert im.mode == mode - assert im.format == format + assert im.format == format_ -@pytest.mark.parametrize('filename,bpc', [ - ('pal.pdf', 8), - ('pal-1bit-trivial.pdf', 1), - pytest.param('pal-1bit-rgb.pdf', 1, marks=pytest.mark.xfail(raises=NotImplementedError)), -]) +@pytest.mark.parametrize( + 'filename,bpc', + [ + ('pal.pdf', 8), + ('pal-1bit-trivial.pdf', 1), + pytest.param( + 'pal-1bit-rgb.pdf', 1, marks=pytest.mark.xfail(raises=NotImplementedError) + ), + ], +) def test_image_palette(resources, filename, bpc): pdf = Pdf.open(resources / filename) pim = PdfImage(next(iter(pdf.pages[0].images.values()))) @@ -234,8 +251,9 @@ def test_bool_in_inline_image(): assert piim.image_mask -@pytest.mark.skipif(not PIL_features.check_codec('jpg_2000'), - reason='no JPEG2000 codec') +@pytest.mark.skipif( + not PIL_features.check_codec('jpg_2000'), reason='no JPEG2000 codec' +) def test_jp2(resources): pdf = Pdf.open(resources / 'pike-jp2.pdf') xobj = next(iter(pdf.pages[0].images.values())) @@ -258,3 +276,52 @@ def test_jp2(resources): pim = PdfImage(xobj) assert pim.colorspace == '/DeviceRGB' assert pim.bits_per_component == 8 + + +def test_extract_filepath(congress, outdir): + xobj, _pdf = congress + pim = PdfImage(xobj) + + # fspath is for Python 3.5 + result = pim.extract_to(fileprefix=fspath(outdir / 'image')) + assert Path(result).exists() + assert (outdir / 'image.jpg').exists() + + +def test_extract_direct_fails_nondefault_colortransform(congress): + xobj, _pdf = congress + + xobj.DecodeParms = Dictionary( + ColorTransform=42 # Non standard (or allowed in the spec) + ) + pim = PdfImage(xobj) + + bio = BytesIO() + with pytest.raises(UnsupportedImageTypeError): + pim._extract_direct(stream=bio) + + xobj.ColorSpace = Name.DeviceCMYK + pim = PdfImage(xobj) + with pytest.raises(UnsupportedImageTypeError): + pim._extract_direct(stream=bio) + + +def test_icc_use(resources): + xobj, _pdf = first_image_in(resources / '1biticc.pdf') + + pim = PdfImage(xobj) + assert pim.mode == '1' + assert pim.colorspace == '/ICCBased' + assert pim.bits_per_component == 1 + + assert pim.icc.profile.xcolor_space == 'GRAY' + + +def test_stacked_compression(resources): + xobj, _pdf = first_image_in(resources / 'pike-flate-jp2.pdf') + + pim = PdfImage(xobj) + assert pim.mode == 'RGB' + assert pim.colorspace == '/DeviceRGB' + assert pim.bits_per_component == 8 + assert pim.filters == ['/FlateDecode', '/JPXDecode'] diff --git a/tests/test_io.py b/tests/test_io.py new file mode 100644 index 0000000..4ce8eb5 --- /dev/null +++ b/tests/test_io.py @@ -0,0 +1,26 @@ +import pytest + +from pikepdf import Pdf +from io import BytesIO + + +@pytest.fixture +def sandwich(resources): + # Has XMP, docinfo, <?adobe-xap-filters esc="CRLF"?>, shorthand attribute XMP + return Pdf.open(resources / 'sandwich.pdf') + + +class LimitedBytesIO(BytesIO): + """Version of BytesIO that only accepts small reads/writes""" + + def write(self, b): + amt = min(len(b), 100) + return super().write(b[:amt]) + + +def test_weird_output_stream(sandwich): + bio = BytesIO() + lbio = LimitedBytesIO() + sandwich.save(bio, static_id=True) + sandwich.save(lbio, static_id=True) + assert bio.getvalue() == lbio.getvalue() diff --git a/tests/test_ipython.py b/tests/test_ipython.py index 4f616c8..36e2e4b 100644 --- a/tests/test_ipython.py +++ b/tests/test_ipython.py @@ -2,9 +2,10 @@ Test IPython/Jupyter display hooks """ -import pikepdf import pytest +import pikepdf + @pytest.fixture def graph(resources): diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 41a879c..3de8ccf 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,28 +1,36 @@ -from pathlib import Path -from datetime import datetime, timezone, timedelta import re +from datetime import datetime, timedelta, timezone +import os +from pathlib import Path +import xml.etree.ElementTree as ET import pytest from hypothesis import given, example from hypothesis.strategies import integers + import pikepdf -from pikepdf import Pdf, Dictionary, Name, PasswordError, Stream +from pikepdf import Dictionary, Name, PasswordError, Pdf, Stream from pikepdf.models.metadata import ( - decode_pdf_date, encode_pdf_date, - XMP_NS_DC, XMP_NS_PDF, XMP_NS_XMP, - DateConverter + XMP_NS_DC, + XMP_NS_PDF, + XMP_NS_XMP, + DateConverter, + decode_pdf_date, + encode_pdf_date, ) -import defusedxml.ElementTree as ET - try: - from libxmp import XMPMeta -except ImportError: - XMPMeta = None + from libxmp import XMPMeta, XMPError +except Exception: + XMPMeta, XMPError = None, None + +needs_libxmp = pytest.mark.skipif( + os.name == 'nt' or not XMPMeta, reason="test requires libxmp" +) pytestmark = pytest.mark.filterwarnings('ignore:.*XMLParser.*:DeprecationWarning') -# pylint: disable=w0621 +# pylint: disable=redefined-outer-name,pointless-statement @pytest.fixture @@ -58,7 +66,10 @@ def invalid_creationdate(resources): def test_lowlevel(sandwich): meta = sandwich.open_metadata() assert meta._qname('pdf:Producer') == '{http://ns.adobe.com/pdf/1.3/}Producer' - assert meta._prefix_from_uri('{http://ns.adobe.com/pdf/1.3/}Producer') == 'pdf:Producer' + assert ( + meta._prefix_from_uri('{http://ns.adobe.com/pdf/1.3/}Producer') + == 'pdf:Producer' + ) assert 'pdf:Producer' in meta assert '{http://ns.adobe.com/pdf/1.3/}Producer' in meta assert 'xmp:CreateDate' in meta @@ -120,16 +131,14 @@ def test_add_new_xmp_and_mark(trivial): ) as xmp_view: assert not xmp_view - with trivial.open_metadata(update_docinfo=False - ) as xmp: + with trivial.open_metadata(update_docinfo=False) as xmp: assert not xmp # No changes at this point del xmp print(trivial.Root.Metadata.read_bytes()) - with trivial.open_metadata(update_docinfo=False - ) as xmp: - assert 'pikepdf' in xmp['pdf:Producer'] + with trivial.open_metadata(update_docinfo=False) as xmp: + assert xmp['pdf:Producer'] == 'pikepdf ' + pikepdf.__version__ assert 'xmp:MetadataDate' in xmp @@ -147,7 +156,9 @@ def test_update_docinfo(vera): assert Name.Author not in vera.docinfo -@pytest.mark.parametrize('filename', list((Path(__file__).parent / 'resources').glob('*.pdf'))) +@pytest.mark.parametrize( + 'filename', list((Path(__file__).parent / 'resources').glob('*.pdf')) +) def test_roundtrip(filename): try: pdf = Pdf.open(filename) @@ -175,6 +186,7 @@ def test_build_metadata(trivial, graph, outdir): assert xmp_date == docinfo_date.isoformat() +@needs_libxmp def test_python_xmp_validate_add(trivial): with trivial.open_metadata() as xmp: xmp['dc:creator'] = ['Bob', 'Doug'] @@ -185,9 +197,6 @@ def test_python_xmp_validate_add(trivial): assert '<rdf:Seq><rdf:li>Bob</rdf:li><rdf:li>Doug</rdf:li>' in xmp_str assert '<rdf:Bag><rdf:li>Mackenzie</rdf:li>' in xmp_str - if not XMPMeta: - pytest.skip(msg='needs libxmp') - xmpmeta = XMPMeta(xmp_str=str(xmp)) DC = XMP_NS_DC assert xmpmeta.does_array_item_exist(DC, 'creator', 'Bob') @@ -196,6 +205,7 @@ def test_python_xmp_validate_add(trivial): assert xmpmeta.does_array_item_exist(DC, 'publisher', 'Mackenzie') +@needs_libxmp def test_python_xmp_validate_change_list(graph): with graph.open_metadata() as xmp: assert 'dc:creator' in xmp @@ -209,14 +219,13 @@ def test_python_xmp_validate_change_list(graph): assert xmpmeta.does_array_item_exist(DC, 'creator', 'Kreacher') +@needs_libxmp def test_python_xmp_validate_change(sandwich): with sandwich.open_metadata() as xmp: assert 'xmp:CreatorTool' in xmp xmp['xmp:CreatorTool'] = 'Creator' # Exists as a xml tag text xmp['pdf:Producer'] = 'Producer' # Exists as a tag node assert str(xmp) - if not XMPMeta: - pytest.skip(msg='needs libxmp') xmpmeta = XMPMeta(xmp_str=str(xmp)) assert xmpmeta.does_property_exist(XMP_NS_XMP, 'CreatorTool') assert xmpmeta.does_property_exist(XMP_NS_PDF, 'Producer') @@ -228,7 +237,10 @@ def test_decode_pdf_date(): ("20180101010101Z00'00'", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)), ("20180101010101Z", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)), ("20180101010101+0000", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)), - ("20180101010101+0100", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone(timedelta(hours=1)))), + ( + "20180101010101+0100", + datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone(timedelta(hours=1))), + ), ] for s, d in VALS: assert decode_pdf_date(s) == d @@ -291,8 +303,10 @@ def test_xpacket_generation(sandwich): xmpstr2 = sandwich.Root.Metadata.read_bytes() assert xmpstr2.startswith(xpacket_begin) + def only_one_substring(s, subs): return s.find(subs) == s.rfind(subs) + assert only_one_substring(xmpstr2, xpacket_begin) assert only_one_substring(xmpstr2, xpacket_end) @@ -318,7 +332,9 @@ def test_remove_attribute_metadata(sandwich): def test_no_x_xmpmeta(trivial): - trivial.Root.Metadata = Stream(trivial, b""" + trivial.Root.Metadata = Stream( + trivial, + b""" <?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:xmp="http://ns.adobe.com/xap/1.0/"> @@ -334,9 +350,71 @@ def test_no_x_xmpmeta(trivial): </rdf:Description> </rdf:RDF> <?xpacket end="w"?> - """.strip()) + """.strip(), + ) with trivial.open_metadata() as xmp: assert xmp._get_rdf_root() is not None xmp['pdfaid:part'] = '2' assert xmp['pdfaid:part'] == '2' + + +def test_empty_xmpmeta(trivial): + trivial.Root.Metadata = Stream( + trivial, + b"""<?xpacket begin="" id=""?> + <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk=""> + </x:xmpmeta> + <?xpacket end=""?> + """, + ) + with trivial.open_metadata() as xmp: + pass + + +@needs_libxmp +def test_pdf_version_update(graph, outdir): + def get_xmp_version(filename): + meta = pikepdf.open(filename).open_metadata() + xmp = XMPMeta(xmp_str=str(meta)) + try: + return xmp.get_property('http://ns.adobe.com/pdf/1.3/', 'PDFVersion') + except XMPError: + return '' + + # We don't update PDFVersion unless it is present, even if we change the PDF version + graph.save( + outdir / 'empty_xmp_pdfversion.pdf', + force_version='1.7', + fix_metadata_version=True, + ) + assert get_xmp_version(outdir / 'empty_xmp_pdfversion.pdf') == '' + + # Add PDFVersion field for remaining tests + with graph.open_metadata() as m: + m['pdf:PDFVersion'] = graph.pdf_version + + # Confirm we don't update the field when the flag is false + graph.save( + outdir / 'inconsistent_version.pdf', + force_version='1.6', + fix_metadata_version=False, + ) + assert get_xmp_version(outdir / 'inconsistent_version.pdf') == '1.3' + + # Confirm we update if present + graph.save(outdir / 'consistent_version.pdf', force_version='1.5') + assert get_xmp_version(outdir / 'consistent_version.pdf') == '1.5' + + +def test_extension_level(trivial, outpdf): + trivial.save(outpdf, min_version=('1.6', 314159)) + pdf = pikepdf.open(outpdf) + assert pdf.pdf_version >= '1.6' and pdf.extension_level == 314159 + + trivial.save(outpdf, force_version=('1.7', 42)) + pdf = pikepdf.open(outpdf) + assert pdf.pdf_version == '1.7' and pdf.extension_level == 42 + + with pytest.raises(TypeError): + trivial.save(outpdf, force_version=('1.7', 'invalid extension level')) diff --git a/tests/test_object.py b/tests/test_object.py index 5e4b008..8fc5db0 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -1,16 +1,34 @@ +import json +import sys from decimal import Decimal, InvalidOperation from math import isclose, isfinite -import sys +from zlib import compress -import pikepdf -from pikepdf import _qpdf as qpdf -from pikepdf import (Object, String, Array, Name, - Dictionary, Operator, PdfError) -from hypothesis import given, example, assume -from hypothesis.strategies import (integers, binary, lists, floats, - characters, recursive, booleans) import pytest +from hypothesis import assume, example, given +from hypothesis.strategies import ( + binary, + booleans, + characters, + floats, + integers, + lists, + recursive, +) +import pikepdf +from pikepdf import ( + Array, + Dictionary, + Name, + Object, + Operator, + PdfError, + String, + Stream, + Pdf, +) +from pikepdf import _qpdf as qpdf # pylint: disable=eval-used,unnecessary-lambda @@ -27,15 +45,16 @@ def test_booleans(): assert encode(False) == False -@given(characters(min_codepoint=0x20, max_codepoint=0x7f)) +@given(characters(min_codepoint=0x20, max_codepoint=0x7F)) @example('') def test_ascii_involution(ascii_): b = ascii_.encode('ascii') assert encode(b) == b -@given(characters(min_codepoint=0x0, max_codepoint=0xfef0, - blacklist_categories=('Cs',))) +@given( + characters(min_codepoint=0x0, max_codepoint=0xFEF0, blacklist_categories=('Cs',)) +) @example('') def test_unicode_involution(s): assert str(encode(s)) == s @@ -47,18 +66,20 @@ def test_binary_involution(binary_): int64s = integers(min_value=-9223372036854775807, max_value=9223372036854775807) + + @given(int64s, int64s) def test_integer_comparison(a, b): - equals = (a == b) - encoded_equals = (encode(a) == encode(b)) + equals = a == b + encoded_equals = encode(a) == encode(b) assert encoded_equals == equals - lessthan = (a < b) - encoded_lessthan = (encode(a) < encode(b)) + lessthan = a < b + encoded_lessthan = encode(a) < encode(b) assert lessthan == encoded_lessthan -@given(integers(-10**12, 10**12), integers(0, 12)) +@given(integers(-10 ** 12, 10 ** 12), integers(0, 12)) def test_decimal_involution(num, radix): strnum = str(num) if radix > len(strnum): @@ -85,7 +106,7 @@ def test_decimal_from_float(f): assert isclose(py_d, d, abs_tol=1e-5), (d, f.hex()) else: - with pytest.raises(PdfError, message=repr(f)): + with pytest.raises(PdfError): Object.parse(str(d)) @@ -95,13 +116,17 @@ def test_list(array): assert a == array -@given(lists(lists(integers(1,10), min_size=1, max_size=5),min_size=1,max_size=5)) +@given(lists(lists(integers(1, 10), min_size=1, max_size=5), min_size=1, max_size=5)) def test_nested_list(array): a = pikepdf.Array(array) assert a == array -@given(recursive(integers(1,10) | booleans(), lambda children: lists(children), max_leaves=20)) +@given( + recursive( + integers(1, 10) | booleans(), lambda children: lists(children), max_leaves=20 + ) +) def test_nested_list2(array): assume(isinstance(array, list)) a = pikepdf.Array(array) @@ -125,11 +150,11 @@ def test_stack_depth(): rlimit = sys.getrecursionlimit() try: sys.setrecursionlimit(100) - with pytest.raises(RecursionError, message="recursion"): + with pytest.raises(RecursionError): assert encode(a) == a - with pytest.raises(RecursionError, message="recursion"): + with pytest.raises(RecursionError): encode(a) == encode(a) # pylint: disable=expression-not-assigned - with pytest.raises(RecursionError, message="recursion"): + with pytest.raises(RecursionError): repr(a) finally: sys.setrecursionlimit(rlimit) # So other tests are not affected @@ -151,6 +176,11 @@ def test_len_array(): assert len(Array([3])) == 1 +def test_wrap_array(): + assert Name('/Foo').wrap_in_array() == Array([Name('/Foo')]) + assert Array([42]).wrap_in_array() == Array([42]) + + def test_name_equality(): # Who needs transitivity? :P # While this is less than ideal ('/Foo' != b'/Foo') it allows for slightly @@ -174,7 +204,6 @@ def test_forbidden_name_usage(): class TestHashViolation: - def check(self, a, b): assert a == b, "invalid test case" assert hash(a) == hash(b), "hash violation" @@ -202,22 +231,23 @@ class TestHashViolation: def test_not_constructible(): - with pytest.raises(TypeError, message="constructor"): + with pytest.raises(TypeError, match="constructor"): Object() class TestRepr: - def test_repr_dict(self): - d = Dictionary({ - '/Boolean': True, - '/Integer': 42, - '/Real': Decimal('42.42'), - '/String': String('hi'), - '/Array': Array([1, 2, 3.14]), - '/Operator': Operator('q'), - '/Dictionary': Dictionary({'/Color': 'Red'}) - }) + d = Dictionary( + { + '/Boolean': True, + '/Integer': 42, + '/Real': Decimal('42.42'), + '/String': String('hi'), + '/Array': Array([1, 2, 3.14]), + '/Operator': Operator('q'), + '/Dictionary': Dictionary({'/Color': 'Red'}), + } + ) expected = """\ pikepdf.Dictionary({ "/Array": [ 1, 2, Decimal('3.140000') ], @@ -245,7 +275,7 @@ class TestRepr: Decimal('3.14'), String('scalar'), Name('/Bob'), - Operator('Q') + Operator('Q'), ] for s in scalars: assert eval(repr(s)) == s @@ -262,12 +292,8 @@ def test_utf16_error(): class TestDictionary: - def test_dictionary_contains(self): - d = Dictionary({ - '/Monty': 'Python', - '/Flying': 'Circus' - }) + d = Dictionary({'/Monty': 'Python', '/Flying': 'Circus'}) assert Name.Flying in d assert Name('/Monty') in d assert Name.Brian not in d @@ -298,10 +324,12 @@ class TestDictionary: for k in d.items(): pass + def test_not_convertible(): class PurePythonObj: def __repr__(self): return 'PurePythonObj()' + c = PurePythonObj() with pytest.raises(RuntimeError): encode(c) @@ -311,3 +339,80 @@ def test_not_convertible(): d = pikepdf.Dictionary() with pytest.raises(RuntimeError): d.SomeKey = c + + +def test_json(): + d = Dictionary( + { + '/Boolean': True, + '/Integer': 42, + '/Real': Decimal('42.42'), + '/String': String('hi'), + '/Array': Array([1, 2, 3.14]), + '/Dictionary': Dictionary({'/Color': 'Red'}), + } + ) + json_bytes = d.to_json(False) + try: + as_dict = json.loads(json_bytes) + except TypeError: + as_dict = json.loads(json_bytes.decode('utf-8')) # Py3.5 shim + assert as_dict == { + "/Array": [1, 2, 3.140000], + "/Boolean": True, + "/Dictionary": {"/Color": "Red"}, + "/Integer": 42, + "/Real": 42.42, + "/String": "hi", + } + + +@pytest.fixture +def stream_object(): + pdf = pikepdf.new() + return Stream(pdf, b'') + + +@pytest.fixture +def sandwich(resources): + return Pdf.open(resources / 'sandwich.pdf') + + +class TestObjectWrite: + def test_basic(self, stream_object): + stream_object.write(b'abc') + assert stream_object.read_bytes() == b'abc' + + def test_compressed_readback(self, stream_object): + stream_object.write(compress(b'def'), filter=Name.FlateDecode) + assert stream_object.read_bytes() == b'def' + + def test_stacked_compression(self, stream_object): + double_compressed = compress(compress(b'pointless')) + stream_object.write( + double_compressed, filter=[Name.FlateDecode, Name.FlateDecode] + ) + assert stream_object.read_bytes() == b'pointless' + assert stream_object.read_raw_bytes() == double_compressed + + def test_explicit_decodeparms(self, stream_object): + double_compressed = compress(compress(b'pointless')) + stream_object.write( + double_compressed, + filter=[Name.FlateDecode, Name.FlateDecode], + decode_parms=[None, None], + ) + assert stream_object.read_bytes() == b'pointless' + assert stream_object.read_raw_bytes() == double_compressed + + def test_no_kwargs(self, stream_object): + with pytest.raises(TypeError): + stream_object.write(compress(b'x'), [Name.FlateDecode]) + + def test_ccitt(self, sandwich, stream_object): + ccitt = b'\x00' # Not valid data, just for testing decode_parms + stream_object.write( + ccitt, + filter=Name.CCITTFaxDecode, + decode_parms=Dictionary(K=-1, Columns=8, Length=1), + ) diff --git a/tests/test_pages.py b/tests/test_pages.py index a542250..c3b2ec9 100644 --- a/tests/test_pages.py +++ b/tests/test_pages.py @@ -1,12 +1,16 @@ -import pytest -from pikepdf import Pdf, Stream, PdfMatrix - +import gc from contextlib import suppress from shutil import copy -import gc - from sys import getrefcount as refcount +import pytest + +from pikepdf import Pdf, PdfMatrix, Stream + + +# pylint: disable=redefined-outer-name,pointless-statement + + @pytest.fixture def graph(resources): return Pdf.open(resources / 'graph.pdf') @@ -47,12 +51,17 @@ def test_delete_last_page(graph, outdir): def test_replace_page(graph, fourpages): q = fourpages q2 = graph + q2.pages[0].CropBox = [0, 0, 500, 500] + + # Ensure the page keys are different, not subsets + assert q.pages[1].keys() - q2.pages[0].keys() + assert q2.pages[0].keys() - q.pages[1].keys() assert len(q.pages) == 4 q.pages[1] = q2.pages[0] assert len(q.pages) == 4 - assert q.pages[1].Resources.XObject.keys() == \ - q2.pages[0].Resources.XObject.keys() + assert q.pages[1].keys() == q2.pages[0].keys() + assert q.pages[1].Resources.XObject.keys() == q2.pages[0].Resources.XObject.keys() def test_hard_replace_page(fourpages, graph, sandwich, outdir): @@ -98,11 +107,11 @@ def test_evil_page_deletion(resources, outdir): assert refcount(src) == 2 pdf.pages.append(src.pages[0]) - assert refcount(src) == 3 + assert refcount(src) == 2 del src.pages[0] gc.collect() - assert refcount(src) == 3 + assert refcount(src) == 2 with suppress(PermissionError): # Fails on Windows (outdir / 'sandwich.pdf').unlink() @@ -115,9 +124,6 @@ def test_evil_page_deletion(resources, outdir): pdf.save(outdir / 'out_nopages.pdf') del pdf gc.collect() - # Ideally we'd see the check_refcount(src, 2) at this point, but we don't - # have a way to find out when a PDF can be closed if a page was copied out - # of it to another PDF def test_append_all(sandwich, fourpages, outdir): @@ -154,10 +160,12 @@ def test_slice_unequal_replacement(fourpages, sandwich, outdir): assert len(pdf.pages) == 2, "number of pages must be changed" pdf.save(outdir / 'out.pdf') - assert pdf.pages[0].Contents.Length == page0_content_len, \ - "page 0 should be unchanged" - assert pdf.pages[1].Contents.Length != page1_content_len, \ - "page 1's contents should have changed" + assert ( + pdf.pages[0].Contents.Length == page0_content_len + ), "page 0 should be unchanged" + assert ( + pdf.pages[1].Contents.Length != page1_content_len + ), "page 1's contents should have changed" def test_slice_with_step(fourpages, sandwich, outdir): @@ -171,24 +179,21 @@ def test_slice_with_step(fourpages, sandwich, outdir): pdf.pages[0::2] = pdf2.pages pdf.save(outdir / 'out.pdf') - assert all(page.Contents.Length == pdf2_content_len - for page in pdf.pages[0::2]) + assert all(page.Contents.Length == pdf2_content_len for page in pdf.pages[0::2]) def test_slice_differing_lengths(fourpages, sandwich): pdf = fourpages pdf2 = sandwich - with pytest.raises(ValueError, - message="attempt to assign"): + with pytest.raises(ValueError, match="attempt to assign"): pdf.pages[0::2] = pdf2.pages[0:1] @pytest.mark.timeout(1) def test_self_extend(fourpages): pdf = fourpages - with pytest.raises(ValueError, - message="source page list modified during iteration"): + with pytest.raises(ValueError, match="source page list modified during iteration"): pdf.pages.extend(pdf.pages) @@ -240,3 +245,26 @@ def test_negative_indexing(fourpages, graph): fourpages.pages[-42] = graph.pages[0] with pytest.raises(IndexError): del fourpages.pages[-42] + + +def test_concatenate(resources, outdir): + # Issue #22 + def concatenate(n): + print('concatenating same page', n, 'times') + output_pdf = Pdf.new() + for i in range(n): + print(i) + pdf_page = Pdf.open(resources / 'pal.pdf') + output_pdf.pages.extend(pdf_page.pages) + output_pdf.save(outdir / '{}.pdf'.format(n)) + + concatenate(5) + + +def test_emplace(fourpages): + p0_objgen = fourpages.pages[0].objgen + fourpages.pages[0].emplace(fourpages.pages[1]) + assert p0_objgen == fourpages.pages[0].objgen + assert fourpages.pages[0].keys() == fourpages.pages[1].keys() + for k in fourpages.pages[0].keys(): + assert fourpages.pages[0][k] == fourpages.pages[1][k] diff --git a/tests/test_parsers.py b/tests/test_parsers.py index fac0ccd..f79e8f1 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -1,18 +1,16 @@ +import shutil +from subprocess import PIPE, run import sys import pytest -from pikepdf import ( - parse_content_stream, Pdf, Stream, Operator, Object, - Dictionary -) -from pikepdf.models import _Page as Page -from pikepdf._qpdf import StreamParser -from subprocess import run, PIPE -import shutil +from pikepdf import Dictionary, Object, Operator, Pdf, Stream, parse_content_stream +from pikepdf._qpdf import StreamParser +from pikepdf.models import _Page as Page # pylint: disable=useless-super-delegation + class PrintParser(StreamParser): def __init__(self): super().__init__() @@ -48,15 +46,15 @@ def test_parser_exception(resources): Object._parse_stream(stream, ExceptionParser()) -@pytest.mark.skipif( - shutil.which('pdftotext') is None, - reason="poppler not installed") +@pytest.mark.skipif(shutil.which('pdftotext') is None, reason="poppler not installed") +@pytest.mark.skipif(sys.version_info < (3, 6), reason="subprocess.run on 3.5") def test_text_filter(resources, outdir): input_pdf = resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf' # Ensure the test PDF has detect we can find - proc = run(['pdftotext', str(input_pdf), '-'], - check=True, stdout=PIPE, encoding='utf-8') + proc = run( + ['pdftotext', str(input_pdf), '-'], check=True, stdout=PIPE, encoding='utf-8' + ) assert proc.stdout.strip() != '', "Need input test file that contains text" pdf = Pdf.open(input_pdf) @@ -76,8 +74,12 @@ def test_text_filter(resources, outdir): pdf.save(outdir / 'notext.pdf', True) - proc = run(['pdftotext', str(outdir / 'notext.pdf'), '-'], - check=True, stdout=PIPE, encoding='utf-8') + proc = run( + ['pdftotext', str(outdir / 'notext.pdf'), '-'], + check=True, + stdout=PIPE, + encoding='utf-8', + ) assert proc.stdout.strip() == '', "Expected text to be removed" @@ -87,13 +89,16 @@ def test_invalid_stream_object(): parse_content_stream(Dictionary({"/Hi": 3})) -@pytest.mark.parametrize("test_file,expected", [ - ("fourpages.pdf", True), - ("graph.pdf", False), - ("veraPDF test suite 6-2-10-t02-pass-a.pdf", True), - ("veraPDF test suite 6-2-3-3-t01-fail-c.pdf", False), - ('sandwich.pdf', True) -]) +@pytest.mark.parametrize( + "test_file,expected", + [ + ("fourpages.pdf", True), + ("graph.pdf", False), + ("veraPDF test suite 6-2-10-t02-pass-a.pdf", True), + ("veraPDF test suite 6-2-3-3-t01-fail-c.pdf", False), + ('sandwich.pdf', True), + ], +) def test_has_text(resources, test_file, expected): pdf = Pdf.open(resources / test_file) for p in pdf.pages: diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 33b949b..abe93cc 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -2,22 +2,33 @@ Testing focused on pikepdf.Pdf """ -import pytest -from pikepdf import Pdf, PasswordError, Stream, PdfError - -import sys import os +import shutil +import sys from io import StringIO +from pathlib import Path from unittest.mock import Mock, patch -import shutil + +import pytest + +import pikepdf +from pikepdf import PasswordError, Pdf, PdfError, Stream from pikepdf._cpphelpers import fspath # For py35 +# pylint: disable=redefined-outer-name + + @pytest.fixture def trivial(resources): return Pdf.open(resources / 'pal-1bit-trivial.pdf') +def test_new(outdir): + pdf = pikepdf.new() + pdf.save(outdir / 'new-empty.pdf') + + def test_non_filename(): with pytest.raises(TypeError): Pdf.open(42) @@ -73,6 +84,15 @@ class TestPasswords: Pdf.open(resources / 'graph-encrypted.pdf') +class TestPermissions: + def test_some_permissions_missing(self, resources): + pdf = Pdf.open(resources / 'graph-encrypted.pdf', 'owner') + assert pdf.allow.print_highres == pdf.allow.modify_annotation == False + + def test_permissions_all_true_not_encrypted(self, trivial): + assert all(trivial.allow.values()) + + class TestStreams: def test_stream(self, resources): with (resources / 'pal-1bit-trivial.pdf').open('rb') as stream: @@ -86,6 +106,7 @@ class TestStreams: def test_save_stream(self, trivial, outdir): from io import BytesIO + pdf = trivial pdf.save(outdir / 'nostream.pdf', static_id=True) @@ -123,8 +144,7 @@ def test_show_xref(trivial): trivial.show_xref_table() -@pytest.mark.skipif(sys.version_info < (3, 6), - reason='missing mock.assert_called') +@pytest.mark.skipif(sys.version_info < (3, 6), reason='missing mock.assert_called') def test_progress(trivial, outdir): pdf = trivial mock = Mock() @@ -135,10 +155,7 @@ def test_progress(trivial, outdir): def test_unicode_filename(resources, outdir): target1 = outdir / '测试.pdf' target2 = outdir / '通过考试.pdf' - shutil.copy( - fspath(resources / 'pal-1bit-trivial.pdf'), - fspath(target1) - ) + shutil.copy(fspath(resources / 'pal-1bit-trivial.pdf'), fspath(target1)) pdf = Pdf.open(target1) pdf.save(target2) assert target2.exists() @@ -149,12 +166,12 @@ def test_fileno_fails(resources): with patch('os.dup') as dup: dup.side_effect = OSError('assume dup fails') with pytest.raises(OSError): - pdf = Pdf.open(resources / 'pal-1bit-trivial.pdf') + Pdf.open(resources / 'pal-1bit-trivial.pdf') with patch('os.dup') as dup: dup.return_value = -1 with pytest.raises(RuntimeError): - pdf = Pdf.open(resources / 'pal-1bit-trivial.pdf') + Pdf.open(resources / 'pal-1bit-trivial.pdf') def test_min_and_force_version(trivial, outdir): @@ -175,3 +192,51 @@ def test_min_and_force_version(trivial, outdir): def test_normalize_linearize(trivial, outdir): with pytest.raises(ValueError): trivial.save(outdir / 'no.pdf', linearize=True, normalize_content=True) + + +def test_make_stream(trivial, outdir): + pdf = trivial + stream = pdf.make_stream(b'q Q') + pdf.pages[0].Contents = stream + pdf.save(outdir / 's.pdf') + + +def test_add_blank_page(trivial): + assert len(trivial.pages) == 1 + + invalid = [-1, 0, 2, 15000] + for n in invalid: + with pytest.raises(ValueError): + trivial.add_blank_page(page_size=(n, n)) + trivial.add_blank_page() + assert len(trivial.pages) == 2 + + +def test_object_stream_mode_generated(trivial, outdir): + trivial.save( + outdir / '1.pdf', + fix_metadata_version=True, + object_stream_mode=pikepdf.ObjectStreamMode.generate, + ) + assert b'/ObjStm' in (outdir / '1.pdf').read_bytes() + + trivial.save( + outdir / '2.pdf', + fix_metadata_version=False, + object_stream_mode=pikepdf.ObjectStreamMode.generate, + ) + assert b'/ObjStm' in (outdir / '2.pdf').read_bytes() + + +def test_with_block(resources): + desc = '' + with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf: + desc = pdf.filename + assert pdf.filename != desc + + +def test_with_block_abuse(resources): + with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf: + im0 = pdf.pages[0].Resources.XObject['/Im0'] + with pytest.raises(PdfError): + im0.read_bytes() diff --git a/tests/test_pdfa.py b/tests/test_pdfa.py index 975b258..305b2d1 100644 --- a/tests/test_pdfa.py +++ b/tests/test_pdfa.py @@ -1,9 +1,11 @@ -import pytest -from pikepdf import Pdf import os -from pathlib import Path -from subprocess import run, PIPE, STDOUT import xml.etree.ElementTree as ET +from pathlib import Path +from subprocess import PIPE, STDOUT, run + +import pytest + +from pikepdf import Pdf try: VERAPDF = Path(os.environ['HOME']) / 'verapdf' / 'verapdf' diff --git a/tests/test_private_pdfs.py b/tests/test_private_pdfs.py index e407fa2..25fdff9 100644 --- a/tests/test_private_pdfs.py +++ b/tests/test_private_pdfs.py @@ -1,13 +1,11 @@ +import gzip +from pathlib import Path + import pytest + from pikepdf import Pdf, PdfError -import os -import platform -import shutil -from contextlib import suppress -from shutil import copy -import gzip -from pathlib import Path +# pylint: disable=redefined-outer-name # Files with unknown copyright status can't be shared publicly @@ -20,8 +18,8 @@ def private(): pytestmark = pytest.mark.skipif( - not PRIVATE_RESOURCES.is_dir(), - reason='private resources not available') + not PRIVATE_RESOURCES.is_dir(), reason='private resources not available' +) def test_pypdf2_issue_361(private): diff --git a/tests/test_refcount.py b/tests/test_refcount.py index a1b8912..879a7a9 100644 --- a/tests/test_refcount.py +++ b/tests/test_refcount.py @@ -1,14 +1,13 @@ import gc -import sys -import pytest -from pikepdf import Pdf - -# This will break on pypy, but we're not quite targetting pypy... from sys import getrefcount as refcount +import pytest + +from pikepdf import Pdf # Try to do some things without blowing up + def test_access_image(resources): pdf = Pdf.open(resources / 'congress.pdf') assert refcount(pdf) == 2 # refcount is always +1 @@ -66,7 +65,7 @@ def test_transfer_page(resources): pdf2.pages.insert(2, page0) p2p2 = pdf2.pages[2] - assert refcount(pdf) == 4 # this, pdf, page0->pdf, pdf2's page0 + assert refcount(pdf) == 3 # this, pdf, page0->pdf assert refcount(p2p2) == 2 del pdf diff --git a/tests/test_sanity.py b/tests/test_sanity.py index df1f387..dcafb7c 100644 --- a/tests/test_sanity.py +++ b/tests/test_sanity.py @@ -2,19 +2,25 @@ A bunch of quick tests that confirm nothing is horribly wrong """ -import pytest - import gc from contextlib import suppress from shutil import copy -import sys +from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError +from io import BytesIO +import threading +import os +import time +import signal + +import pytest import pikepdf -from pikepdf import Pdf, Object, Name, Stream +from pikepdf import Name, Object, Pdf, Stream def test_minimum_qpdf_version(): from pikepdf import _qpdf + assert _qpdf.qpdf_version() >= '7.0.0' @@ -39,20 +45,24 @@ def test_create_pdf(outdir): pdf = Pdf.new() font = pdf.make_indirect( - Object.parse(b""" + Object.parse( + b""" << /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding - >>""")) + >>""" + ) + ) width, height = 100, 100 image_data = b"\xff\x7f\x00" * (width * height) image = Stream(pdf, image_data) - image.stream_dict = Object.parse(b""" + image.stream_dict = Object.parse( + b""" << /Type /XObject /Subtype /Image @@ -60,16 +70,14 @@ def test_create_pdf(outdir): /BitsPerComponent 8 /Width 100 /Height 100 - >>""") + >>""" + ) rfont = {'/F1': font} xobj = {'/Im1': image} - resources = { - '/Font': rfont, - '/XObject': xobj - } + resources = {'/Font': rfont, '/XObject': xobj} mediabox = [0, 0, 612, 792] @@ -84,8 +92,8 @@ def test_create_pdf(outdir): '/Type': Name('/Page'), '/MediaBox': mediabox, '/Contents': contents, - '/Resources': resources - } + '/Resources': resources, + } qpdf_page_dict = page_dict page = pdf.make_indirect(qpdf_page_dict) @@ -125,7 +133,9 @@ def test_open_save(resources, outdir): out = str(outdir / 'graph.pdf') copy(str(resources / 'graph.pdf'), out) src = Pdf.open(out) - src.save(out) + with pytest.raises(ValueError): + src.save(out) + src.save(outdir / 'graph2.pdf') def test_readme_example(resources, outdir): |