pdfrw/toreportlab.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details

'''
Converts pdfrw objects into reportlab objects.

Designed for and tested with rl 2.3.

Knows too much about reportlab internals.
What can you do?

The interface to this function is through the makerl() function.

Parameters:
        canv       - a reportlab "canvas" (also accepts a "document")
        pdfobj      - a pdfrw PDF object

Returns:
        A corresponding reportlab object, or if the
        object is a PDF Form XObject, the name to
        use with reportlab for the object.

        Will recursively convert all necessary objects.
        Be careful when converting a page -- if /Parent is set,
        will recursively convert all pages!

Notes:
    1) Original objects are annotated with a
        derived_rl_obj attribute which points to the
        reportlab object.  This keeps multiple reportlab
        objects from being generated for the same pdfobj
        via repeated calls to makerl.  This is great for
        not putting too many objects into the
        new PDF, but not so good if you are modifying
        objects for different pages.  Then you
        need to do your own deep copying (of circular
        structures).  You're on your own.

    2) ReportLab seems weird about FormXObjects.
       They pass around a partial name instead of the
       object or a reference to it.  So we have to
       reach into reportlab and get a number for
       a unique name.  I guess this is to make it
       where you can combine page streams with
       impunity, but that's just a guess.

    3) Updated 1/23/2010 to handle multipass documents
       (e.g. with a table of contents).  These have
       a different doc object on every pass.

'''

from reportlab.pdfbase import pdfdoc as rldocmodule
from .objects import PdfDict, PdfArray, PdfName
from .py23_diffs import convert_store

RLStream = rldocmodule.PDFStream
RLDict = rldocmodule.PDFDictionary
RLArray = rldocmodule.PDFArray


def _makedict(rldoc, pdfobj):
    rlobj = rldict = RLDict()
    if pdfobj.indirect:
        rlobj.__RefOnly__ = 1
        rlobj = rldoc.Reference(rlobj)
    pdfobj.derived_rl_obj[rldoc] = rlobj, None

    for key, value in pdfobj.iteritems():
        rldict[key[1:]] = makerl_recurse(rldoc, value)

    return rlobj


def _makestream(rldoc, pdfobj, xobjtype=PdfName.XObject):
    rldict = RLDict()
    rlobj = RLStream(rldict, convert_store(pdfobj.stream))

    if pdfobj.Type == xobjtype:
        shortname = 'pdfrw_%s' % (rldoc.objectcounter + 1)
        fullname = rldoc.getXObjectName(shortname)
    else:
        shortname = fullname = None
    result = rldoc.Reference(rlobj, fullname)
    pdfobj.derived_rl_obj[rldoc] = result, shortname

    for key, value in pdfobj.iteritems():
        rldict[key[1:]] = makerl_recurse(rldoc, value)

    return result


def _makearray(rldoc, pdfobj):
    rlobj = rlarray = RLArray([])
    if pdfobj.indirect:
        rlobj.__RefOnly__ = 1
        rlobj = rldoc.Reference(rlobj)
    pdfobj.derived_rl_obj[rldoc] = rlobj, None

    mylist = rlarray.sequence
    for value in pdfobj:
        mylist.append(makerl_recurse(rldoc, value))

    return rlobj


def _makestr(rldoc, pdfobj):
    assert isinstance(pdfobj, (float, int, str)), repr(pdfobj)
    # TODO: Add fix for float like in pdfwriter
    return str(getattr(pdfobj, 'encoded', pdfobj))


def makerl_recurse(rldoc, pdfobj):
    docdict = getattr(pdfobj, 'derived_rl_obj', None)
    if docdict is not None:
        value = docdict.get(rldoc)
        if value is not None:
            return value[0]
    if isinstance(pdfobj, PdfDict):
        if pdfobj.stream is not None:
            func = _makestream
        else:
            func = _makedict
        if docdict is None:
            pdfobj.private.derived_rl_obj = {}
    elif isinstance(pdfobj, PdfArray):
        func = _makearray
        if docdict is None:
            pdfobj.derived_rl_obj = {}
    else:
        func = _makestr
    return func(rldoc, pdfobj)


def makerl(canv, pdfobj):
    try:
        rldoc = canv._doc
    except AttributeError:
        rldoc = canv
    rlobj = makerl_recurse(rldoc, pdfobj)
    try:
        name = pdfobj.derived_rl_obj[rldoc][1]
    except AttributeError:
        name = None
    return name or rlobj