summaryrefslogtreecommitdiff
path: root/tests/test_pdf.py
blob: abe93cc5bd15ebad43a414d33eb8b45b32462dff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
"""
Testing focused on pikepdf.Pdf
"""

import os
import shutil
import sys
from io import StringIO
from pathlib import Path
from unittest.mock import Mock, patch

import pytest

import pikepdf
from pikepdf import PasswordError, Pdf, PdfError, Stream
from pikepdf._cpphelpers import fspath  # For py35


# pylint: disable=redefined-outer-name


@pytest.fixture
def trivial(resources):
    return Pdf.open(resources / 'pal-1bit-trivial.pdf')


def test_new(outdir):
    pdf = pikepdf.new()
    pdf.save(outdir / 'new-empty.pdf')


def test_non_filename():
    with pytest.raises(TypeError):
        Pdf.open(42)


def test_not_existing_file():
    with pytest.raises(FileNotFoundError):
        Pdf.open('does_not_exist.pdf')


def test_empty(outdir):
    target = outdir / 'empty.pdf'
    target.touch()
    with pytest.raises(PdfError):
        Pdf.open(target)


class TestLinearization:
    def test_linearization(self, resources, outdir):
        pdf = Pdf.open(resources / 'graph.pdf')
        assert not pdf.is_linearized

        pdf.save(outdir / 'lin.pdf', linearize=True)

        pdf = Pdf.open(outdir / 'lin.pdf')
        assert pdf.is_linearized

        sio = StringIO()
        pdf.check_linearization(sio)


def test_objgen(resources):
    src = Pdf.open(resources / 'graph.pdf')
    im0 = src.pages[0].Resources.XObject['/Im0']
    assert im0.objgen == (5, 0)
    object5 = src.get_object((5, 0))
    assert object5.is_owned_by(src)
    assert object5 == im0


class TestPasswords:
    def test_open_pdf_wrong_password(self, resources):
        # The correct passwords are "owner" and "user"
        with pytest.raises(PasswordError):
            Pdf.open(resources / 'graph-encrypted.pdf', password='wrong')

    def test_open_pdf_password_encoding(self, resources):
        with pytest.raises(PasswordError):
            Pdf.open(resources / 'graph-encrypted.pdf', password=b'\x01\xfe')

    def test_open_pdf_no_password_but_needed(self, resources):
        with pytest.raises(PasswordError):
            Pdf.open(resources / 'graph-encrypted.pdf')


class TestPermissions:
    def test_some_permissions_missing(self, resources):
        pdf = Pdf.open(resources / 'graph-encrypted.pdf', 'owner')
        assert pdf.allow.print_highres == pdf.allow.modify_annotation == False

    def test_permissions_all_true_not_encrypted(self, trivial):
        assert all(trivial.allow.values())


class TestStreams:
    def test_stream(self, resources):
        with (resources / 'pal-1bit-trivial.pdf').open('rb') as stream:
            pdf = Pdf.open(stream)
        assert pdf.root.Pages.Count == 1

    def test_no_text_stream(self, resources):
        with pytest.raises(TypeError):
            with (resources / 'pal-1bit-trivial.pdf').open('r') as stream:
                Pdf.open(stream)

    def test_save_stream(self, trivial, outdir):
        from io import BytesIO

        pdf = trivial
        pdf.save(outdir / 'nostream.pdf', static_id=True)

        bio = BytesIO()
        pdf.save(bio, static_id=True)
        bio.seek(0)

        with (outdir / 'nostream.pdf').open('rb') as saved_file:
            saved_file_contents = saved_file.read()
        assert saved_file_contents == bio.read()


class TestMemory:
    def test_memory(self, resources):
        pdf = (resources / 'pal-1bit-trivial.pdf').read_bytes()
        with pytest.raises(Exception):
            pdf = Pdf.open(pdf)


def test_remove_unreferenced(resources, outdir):
    in_ = resources / 'sandwich.pdf'
    out1 = outdir / 'out1.pdf'
    out2 = outdir / 'out2.pdf'
    pdf = Pdf.open(in_)
    pdf.pages[0].Contents = Stream(pdf, b' ')
    pdf.save(out1)

    pdf.remove_unreferenced_resources()
    pdf.save(out2)

    assert out2.stat().st_size < out1.stat().st_size


def test_show_xref(trivial):
    trivial.show_xref_table()


@pytest.mark.skipif(sys.version_info < (3, 6), reason='missing mock.assert_called')
def test_progress(trivial, outdir):
    pdf = trivial
    mock = Mock()
    pdf.save(outdir / 'out.pdf', progress=mock)
    mock.assert_called()


def test_unicode_filename(resources, outdir):
    target1 = outdir / '测试.pdf'
    target2 = outdir / '通过考试.pdf'
    shutil.copy(fspath(resources / 'pal-1bit-trivial.pdf'), fspath(target1))
    pdf = Pdf.open(target1)
    pdf.save(target2)
    assert target2.exists()


@pytest.mark.skipif(os.name == 'nt', reason='os.dup hackery not supported')
def test_fileno_fails(resources):
    with patch('os.dup') as dup:
        dup.side_effect = OSError('assume dup fails')
        with pytest.raises(OSError):
            Pdf.open(resources / 'pal-1bit-trivial.pdf')

    with patch('os.dup') as dup:
        dup.return_value = -1
        with pytest.raises(RuntimeError):
            Pdf.open(resources / 'pal-1bit-trivial.pdf')


def test_min_and_force_version(trivial, outdir):
    pdf = trivial
    pdf.save(outdir / '1.7.pdf', min_version='1.7')

    pdf17 = Pdf.open(outdir / '1.7.pdf')
    assert pdf17.pdf_version == '1.7'

    with pytest.raises(RuntimeError):
        pdf.save('notaversion.pdf', min_version='foo')

    pdf.save(outdir / '1.2.pdf', force_version='1.2')
    pdf12 = Pdf.open(outdir / '1.2.pdf')
    assert pdf12.pdf_version == '1.2'


def test_normalize_linearize(trivial, outdir):
    with pytest.raises(ValueError):
        trivial.save(outdir / 'no.pdf', linearize=True, normalize_content=True)


def test_make_stream(trivial, outdir):
    pdf = trivial
    stream = pdf.make_stream(b'q Q')
    pdf.pages[0].Contents = stream
    pdf.save(outdir / 's.pdf')


def test_add_blank_page(trivial):
    assert len(trivial.pages) == 1

    invalid = [-1, 0, 2, 15000]
    for n in invalid:
        with pytest.raises(ValueError):
            trivial.add_blank_page(page_size=(n, n))
    trivial.add_blank_page()
    assert len(trivial.pages) == 2


def test_object_stream_mode_generated(trivial, outdir):
    trivial.save(
        outdir / '1.pdf',
        fix_metadata_version=True,
        object_stream_mode=pikepdf.ObjectStreamMode.generate,
    )
    assert b'/ObjStm' in (outdir / '1.pdf').read_bytes()

    trivial.save(
        outdir / '2.pdf',
        fix_metadata_version=False,
        object_stream_mode=pikepdf.ObjectStreamMode.generate,
    )
    assert b'/ObjStm' in (outdir / '2.pdf').read_bytes()


def test_with_block(resources):
    desc = ''
    with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
        desc = pdf.filename
    assert pdf.filename != desc


def test_with_block_abuse(resources):
    with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
        im0 = pdf.pages[0].Resources.XObject['/Im0']
    with pytest.raises(PdfError):
        im0.read_bytes()