pdfrw/pagemerge.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242

# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details

'''
This module contains code to edit pages.  Sort of a canvas, I
suppose, but I wouldn't want to call it that and get people all
excited or anything.

No, this is just for doing basic things like merging/splitting
apart pages, watermarking, etc.  All it does is allow converting
pages (or parts of pages) into Form XObject rectangles, and then
plopping those down on new or pre-existing pages.
'''

from .objects import PdfDict, PdfArray, PdfName
from .buildxobj import pagexobj, ViewInfo

NullInfo = ViewInfo()


class RectXObj(PdfDict):
    ''' This class facilitates doing positioning (moving and scaling)
        of Form XObjects within their containing page, by modifying
        the Form XObject's transformation matrix.

        By default, this class keeps the aspect ratio locked.  For
        example, if your object is foo, you can write 'foo.w = 200',
        and it will scale in both the x and y directions.

        To unlock the aspect ration, you have to do a tiny bit of math
        and call the scale function.
    '''
    def __init__(self, page, viewinfo=NullInfo, **kw):
        ''' The page is a page returned by PdfReader.  It will be
            turned into a cached Form XObject (so that multiple
            rectangles can be extracted from it if desired), and then
            another Form XObject will be built using it and the viewinfo
            (which should be a ViewInfo class).  The viewinfo includes
            source coordinates (from the top/left) and rotation information.

            Once the object has been built, its destination coordinates
            may be examined and manipulated by using x, y, w, h, and
            scale.  The destination coordinates are in the normal
            PDF programmatic system (starting at bottom left).
        '''
        if kw:
            if viewinfo is not NullInfo:
                raise ValueError("Cannot modify preexisting ViewInfo")
            viewinfo = ViewInfo(**kw)
        viewinfo.cacheable = False
        base = pagexobj(page, viewinfo)
        self.update(base)
        self.indirect = True
        self.stream = base.stream
        private = self.private
        private._rect = [base.x, base.y, base.w, base.h]
        matrix = self.Matrix
        if matrix is None:
            matrix = self.Matrix = PdfArray((1, 0, 0, 1, 0, 0))
        private._matrix = matrix  # Lookup optimization
        # Default to lower-left corner
        self.x = 0
        self.y = 0

    @property
    def x(self):
        ''' X location (from left) of object in points
        '''
        return self._rect[0]

    @property
    def y(self):
        ''' Y location (from bottom) of object in points
        '''
        return self._rect[1]

    @property
    def w(self):
        ''' Width of object in points
        '''
        return self._rect[2]

    @property
    def h(self):
        ''' Height of object in points
        '''
        return self._rect[3]

    def __setattr__(self, name, value, next=PdfDict.__setattr__,
                    mine=set('x y w h'.split())):
        ''' The underlying __setitem__ won't let us use a property
            setter, so we have to fake one.
        '''
        if name not in mine:
            return next(self, name, value)
        if name in 'xy':
            r_index, m_index = (0, 4) if name == 'x' else (1, 5)
            self._rect[r_index], old = value, self._rect[r_index]
            self._matrix[m_index] += value - old
        else:
            index = 2 + (value == 'h')
            self.scale(value / self._rect[index])

    def scale(self, x_scale, y_scale=None):
        ''' Current scaling deals properly with things that
            have been rotated in 90 degree increments
            (via the ViewMerge object given when instantiating).
        '''
        if y_scale is None:
            y_scale = x_scale
        x, y, w, h = rect = self._rect
        ao, bo, co, do, eo, fo = matrix = self._matrix
        an = ao * x_scale
        bn = bo * y_scale
        cn = co * x_scale
        dn = do * y_scale
        en = x + (eo - x) * 1.0 * (an + cn) / (ao + co)
        fn = y + (fo - y) * 1.0 * (bn + dn) / (bo + do)
        matrix[:] = an, bn, cn, dn, en, fn
        rect[:] = x, y, w * x_scale, h * y_scale

    @property
    def box(self):
        ''' Return the bounding box for the object
        '''
        x, y, w, h = self._rect
        return PdfArray([x, y, x + w, y + h])


class PageMerge(list):
    ''' A PageMerge object can have 0 or 1 underlying pages
        (that get edited with the results of the merge)
        and 0-n RectXObjs that can be applied before or
        after the underlying page.
    '''
    page = None
    mbox = None
    cbox = None
    resources = None
    rotate = None
    contents = None

    def __init__(self, page=None):
        if page is not None:
            self.setpage(page)

    def setpage(self, page):
        if page.Type != PdfName.Page:
            raise TypeError("Expected page")
        self.append(None)  # Placeholder
        self.page = page
        inheritable = page.inheritable
        self.mbox = inheritable.MediaBox
        self.cbox = inheritable.CropBox
        self.resources = inheritable.Resources
        self.rotate = inheritable.Rotate
        self.contents = page.Contents

    def __add__(self, other):
        if isinstance(other, dict):
            other = [other]
        for other in other:
            self.add(other)
        return self

    def add(self, obj, prepend=False, **kw):
        if kw:
            obj = RectXObj(obj, **kw)
        elif obj.Type == PdfName.Page:
            obj = RectXObj(obj)
        if prepend:
            self.insert(0, obj)
        else:
            self.append(obj)
        return self

    def render(self):
        def do_xobjs(xobj_list):
            content = []
            for obj in xobj_list:
                index = PdfName('pdfrw_%d' % (key_offset + len(xobjs)))
                if xobjs.setdefault(index, obj) is not obj:
                    raise KeyError("XObj key %s already in use" % index)
                content.append('%s Do' % index)
            return PdfDict(indirect=True, stream='\n'.join(content))

        mbox = self.mbox
        cbox = self.cbox
        page = self.page
        old_contents = self.contents
        resources = self.resources or PdfDict()

        key_offset = 0
        xobjs = resources.XObject
        if xobjs is None:
            xobjs = resources.XObject = PdfDict()
        else:
            allkeys = xobjs.keys()
            if allkeys:
                keys = (x for x in allkeys if x.startswith('/pdfrw_'))
                keys = (x for x in keys if x[6:].isdigit())
                keys = sorted(keys, key=lambda x: int(x[6:]))
                key_offset = (int(keys[-1][6:]) + 1) if keys else 0
                key_offset -= len(allkeys)

        if old_contents is None:
            new_contents = do_xobjs(self)
        else:
            isdict = isinstance(old_contents, PdfDict)
            old_contents = [old_contents] if isdict else old_contents
            new_contents = PdfArray()
            index = self.index(None)
            if index:
                new_contents.append(do_xobjs(self[:index]))
            new_contents.extend(old_contents)
            index += 1
            if index < len(self):
                new_contents.append(do_xobjs(self[index:]))

        if mbox is None:
            cbox = None
            mbox = self.xobj_box
            mbox[0] = min(0, mbox[0])
            mbox[1] = min(0, mbox[1])

        page = PdfDict(indirect=True) if page is None else page
        page.Type = PdfName.Page
        page.Resources = resources
        page.MediaBox = mbox
        page.CropBox = cbox
        page.Rotate = self.rotate
        page.Contents = new_contents
        return page

    @property
    def xobj_box(self):
        ''' Return the smallest box that encloses every object
            in the list.
        '''
        a, b, c, d = zip(*(xobj.box for xobj in self))
        return PdfArray((min(a), min(b), max(c), max(d)))