1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
|
# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details
'''
This module contains code to edit pages. Sort of a canvas, I
suppose, but I wouldn't want to call it that and get people all
excited or anything.
No, this is just for doing basic things like merging/splitting
apart pages, watermarking, etc. All it does is allow converting
pages (or parts of pages) into Form XObject rectangles, and then
plopping those down on new or pre-existing pages.
'''
from .objects import PdfDict, PdfArray, PdfName
from .buildxobj import pagexobj, ViewInfo
NullInfo = ViewInfo()
class RectXObj(PdfDict):
''' This class facilitates doing positioning (moving and scaling)
of Form XObjects within their containing page, by modifying
the Form XObject's transformation matrix.
By default, this class keeps the aspect ratio locked. For
example, if your object is foo, you can write 'foo.w = 200',
and it will scale in both the x and y directions.
To unlock the aspect ration, you have to do a tiny bit of math
and call the scale function.
'''
def __init__(self, page, viewinfo=NullInfo, **kw):
''' The page is a page returned by PdfReader. It will be
turned into a cached Form XObject (so that multiple
rectangles can be extracted from it if desired), and then
another Form XObject will be built using it and the viewinfo
(which should be a ViewInfo class). The viewinfo includes
source coordinates (from the top/left) and rotation information.
Once the object has been built, its destination coordinates
may be examined and manipulated by using x, y, w, h, and
scale. The destination coordinates are in the normal
PDF programmatic system (starting at bottom left).
'''
if kw:
if viewinfo is not NullInfo:
raise ValueError("Cannot modify preexisting ViewInfo")
viewinfo = ViewInfo(**kw)
viewinfo.cacheable = False
base = pagexobj(page, viewinfo)
self.update(base)
self.indirect = True
self.stream = base.stream
private = self.private
private._rect = [base.x, base.y, base.w, base.h]
matrix = self.Matrix
if matrix is None:
matrix = self.Matrix = PdfArray((1, 0, 0, 1, 0, 0))
private._matrix = matrix # Lookup optimization
# Default to lower-left corner
self.x = 0
self.y = 0
@property
def x(self):
''' X location (from left) of object in points
'''
return self._rect[0]
@property
def y(self):
''' Y location (from bottom) of object in points
'''
return self._rect[1]
@property
def w(self):
''' Width of object in points
'''
return self._rect[2]
@property
def h(self):
''' Height of object in points
'''
return self._rect[3]
def __setattr__(self, name, value, next=PdfDict.__setattr__,
mine=set('x y w h'.split())):
''' The underlying __setitem__ won't let us use a property
setter, so we have to fake one.
'''
if name not in mine:
return next(self, name, value)
if name in 'xy':
r_index, m_index = (0, 4) if name == 'x' else (1, 5)
self._rect[r_index], old = value, self._rect[r_index]
self._matrix[m_index] += value - old
else:
index = 2 + (value == 'h')
self.scale(value / self._rect[index])
def scale(self, x_scale, y_scale=None):
''' Current scaling deals properly with things that
have been rotated in 90 degree increments
(via the ViewMerge object given when instantiating).
'''
if y_scale is None:
y_scale = x_scale
x, y, w, h = rect = self._rect
ao, bo, co, do, eo, fo = matrix = self._matrix
an = ao * x_scale
bn = bo * y_scale
cn = co * x_scale
dn = do * y_scale
en = x + (eo - x) * 1.0 * (an + cn) / (ao + co)
fn = y + (fo - y) * 1.0 * (bn + dn) / (bo + do)
matrix[:] = an, bn, cn, dn, en, fn
rect[:] = x, y, w * x_scale, h * y_scale
@property
def box(self):
''' Return the bounding box for the object
'''
x, y, w, h = self._rect
return PdfArray([x, y, x + w, y + h])
class PageMerge(list):
''' A PageMerge object can have 0 or 1 underlying pages
(that get edited with the results of the merge)
and 0-n RectXObjs that can be applied before or
after the underlying page.
'''
page = None
mbox = None
cbox = None
resources = None
rotate = None
contents = None
def __init__(self, page=None):
if page is not None:
self.setpage(page)
def setpage(self, page):
if page.Type != PdfName.Page:
raise TypeError("Expected page")
self.append(None) # Placeholder
self.page = page
inheritable = page.inheritable
self.mbox = inheritable.MediaBox
self.cbox = inheritable.CropBox
self.resources = inheritable.Resources
self.rotate = inheritable.Rotate
self.contents = page.Contents
def __add__(self, other):
if isinstance(other, dict):
other = [other]
for other in other:
self.add(other)
return self
def add(self, obj, prepend=False, **kw):
if kw:
obj = RectXObj(obj, **kw)
elif obj.Type == PdfName.Page:
obj = RectXObj(obj)
if prepend:
self.insert(0, obj)
else:
self.append(obj)
return self
def render(self):
def do_xobjs(xobj_list):
content = []
for obj in xobj_list:
index = PdfName('pdfrw_%d' % (key_offset + len(xobjs)))
if xobjs.setdefault(index, obj) is not obj:
raise KeyError("XObj key %s already in use" % index)
content.append('%s Do' % index)
return PdfDict(indirect=True, stream='\n'.join(content))
mbox = self.mbox
cbox = self.cbox
page = self.page
old_contents = self.contents
resources = self.resources or PdfDict()
key_offset = 0
xobjs = resources.XObject
if xobjs is None:
xobjs = resources.XObject = PdfDict()
else:
allkeys = xobjs.keys()
if allkeys:
keys = (x for x in allkeys if x.startswith('/pdfrw_'))
keys = (x for x in keys if x[6:].isdigit())
keys = sorted(keys, key=lambda x: int(x[6:]))
key_offset = (int(keys[-1][6:]) + 1) if keys else 0
key_offset -= len(allkeys)
if old_contents is None:
new_contents = do_xobjs(self)
else:
isdict = isinstance(old_contents, PdfDict)
old_contents = [old_contents] if isdict else old_contents
new_contents = PdfArray()
index = self.index(None)
if index:
new_contents.append(do_xobjs(self[:index]))
new_contents.extend(old_contents)
index += 1
if index < len(self):
new_contents.append(do_xobjs(self[index:]))
if mbox is None:
cbox = None
mbox = self.xobj_box
mbox[0] = min(0, mbox[0])
mbox[1] = min(0, mbox[1])
page = PdfDict(indirect=True) if page is None else page
page.Type = PdfName.Page
page.Resources = resources
page.MediaBox = mbox
page.CropBox = cbox
page.Rotate = self.rotate
page.Contents = new_contents
return page
@property
def xobj_box(self):
''' Return the smallest box that encloses every object
in the list.
'''
a, b, c, d = zip(*(xobj.box for xobj in self))
return PdfArray((min(a), min(b), max(c), max(d)))
|