diff options
Diffstat (limited to 'examples/rl2/decodegraphics.py')
-rw-r--r-- | examples/rl2/decodegraphics.py | 114 |
1 files changed, 90 insertions, 24 deletions
diff --git a/examples/rl2/decodegraphics.py b/examples/rl2/decodegraphics.py index 9fa5a05..e2f3a9f 100644 --- a/examples/rl2/decodegraphics.py +++ b/examples/rl2/decodegraphics.py @@ -1,4 +1,4 @@ -# A part of pdfrw (pdfrw.googlecode.com) +# A part of pdfrw (https://github.com/pmaupin/pdfrw) # Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas # MIT license -- See LICENSE.txt for details @@ -13,13 +13,13 @@ Better to use Form XObjects for most things (see the example in rl1). ''' from inspect import getargspec -import find_pdfrw from pdfrw import PdfTokens -from pdfrw.pdfobjects import PdfString +from pdfrw.objects import PdfString ############################################################################# # Graphics parsing + def parse_array(self, token='[', params=None): mylist = [] for token in self.tokens: @@ -28,105 +28,134 @@ def parse_array(self, token='[', params=None): mylist.append(token) self.params.append(mylist) + def parse_savestate(self, token='q', params=''): self.canv.saveState() + def parse_restorestate(self, token='Q', params=''): self.canv.restoreState() + def parse_transform(self, token='cm', params='ffffff'): self.canv.transform(*params) + def parse_linewidth(self, token='w', params='f'): self.canv.setLineWidth(*params) + def parse_linecap(self, token='J', params='i'): self.canv.setLineCap(*params) + def parse_linejoin(self, token='j', params='i'): self.canv.setLineJoin(*params) + def parse_miterlimit(self, token='M', params='f'): self.canv.setMiterLimit(*params) + def parse_dash(self, token='d', params='as'): # Array, string self.canv.setDash(*params) + def parse_intent(self, token='ri', params='n'): # TODO: add logging pass + def parse_flatness(self, token='i', params='i'): # TODO: add logging pass + def parse_gstate(self, token='gs', params='n'): # TODO: add logging # Could parse stuff we care about from here later pass + def parse_move(self, token='m', params='ff'): if self.gpath is None: self.gpath = self.canv.beginPath() self.gpath.moveTo(*params) self.current_point = params + def parse_line(self, token='l', params='ff'): self.gpath.lineTo(*params) self.current_point = params + def parse_curve(self, token='c', params='ffffff'): self.gpath.curveTo(*params) self.current_point = params[-2:] + def parse_curve1(self, token='v', params='ffff'): parse_curve(self, token, tuple(self.current_point) + tuple(params)) + def parse_curve2(self, token='y', params='ffff'): parse_curve(self, token, tuple(params) + tuple(params[-2:])) + def parse_close(self, token='h', params=''): self.gpath.close() + def parse_rect(self, token='re', params='ffff'): if self.gpath is None: self.gpath = self.canv.beginPath() self.gpath.rect(*params) self.current_point = params[-2:] + def parse_stroke(self, token='S', params=''): finish_path(self, 1, 0, 0) + def parse_close_stroke(self, token='s', params=''): self.gpath.close() finish_path(self, 1, 0, 0) + def parse_fill(self, token='f', params=''): finish_path(self, 0, 1, 1) + def parse_fill_compat(self, token='F', params=''): finish_path(self, 0, 1, 1) + def parse_fill_even_odd(self, token='f*', params=''): finish_path(self, 0, 1, 0) + def parse_fill_stroke_even_odd(self, token='B*', params=''): finish_path(self, 1, 1, 0) + def parse_fill_stroke(self, token='B', params=''): finish_path(self, 1, 1, 1) + def parse_close_fill_stroke_even_odd(self, token='b*', params=''): self.gpath.close() finish_path(self, 1, 1, 0) + def parse_close_fill_stroke(self, token='b', params=''): self.gpath.close() finish_path(self, 1, 1, 1) + def parse_nop(self, token='n', params=''): finish_path(self, 0, 0, 0) + def finish_path(self, stroke, fill, fillmode): if self.gpath is not None: canv = self.canv @@ -135,39 +164,49 @@ def finish_path(self, stroke, fill, fillmode): canv._fillMode = oldmode self.gpath = None + def parse_clip_path(self, token='W', params=''): # TODO: add logging pass + def parse_clip_path_even_odd(self, token='W*', params=''): # TODO: add logging pass + def parse_stroke_gray(self, token='G', params='f'): self.canv.setStrokeGray(*params) + def parse_fill_gray(self, token='g', params='f'): self.canv.setFillGray(*params) + def parse_stroke_rgb(self, token='RG', params='fff'): self.canv.setStrokeColorRGB(*params) + def parse_fill_rgb(self, token='rg', params='fff'): self.canv.setFillColorRGB(*params) + def parse_stroke_cmyk(self, token='K', params='ffff'): self.canv.setStrokeColorCMYK(*params) + def parse_fill_cmyk(self, token='k', params='ffff'): self.canv.setFillColorCMYK(*params) ############################################################################# # Text parsing + def parse_begin_text(self, token='BT', params=''): assert self.tpath is None self.tpath = self.canv.beginText() + def parse_text_transform(self, token='Tm', params='ffffff'): path = self.tpath @@ -177,20 +216,23 @@ def parse_text_transform(self, token='Tm', params='ffffff'): except AttributeError: pass else: - if code[-1] == '1 0 0 1 0 0 Tm': + if code[-1] == '1 0 0 1 0 0 Tm': code.pop() path.setTextTransform(*params) + def parse_setfont(self, token='Tf', params='nf'): fontinfo = self.fontdict[params[0]] self.tpath._setFont(fontinfo.name, params[1]) self.curfont = fontinfo + def parse_text_out(self, token='Tj', params='t'): text = params[0].decode(self.curfont.remap, self.curfont.twobyte) self.tpath.textOut(text) + def parse_TJ(self, token='TJ', params='a'): remap = self.curfont.remap twobyte = self.curfont.twobyte @@ -204,41 +246,52 @@ def parse_TJ(self, token='TJ', params='a'): text = ''.join(result) self.tpath.textOut(text) + def parse_end_text(self, token='ET', params=''): assert self.tpath is not None self.canv.drawText(self.tpath) - self.tpath=None + self.tpath = None + def parse_move_cursor(self, token='Td', params='ff'): self.tpath.moveCursor(params[0], -params[1]) + def parse_set_leading(self, token='TL', params='f'): self.tpath.setLeading(*params) + def parse_text_line(self, token='T*', params=''): self.tpath.textLine() + def parse_set_char_space(self, token='Tc', params='f'): self.tpath.setCharSpace(*params) + def parse_set_word_space(self, token='Tw', params='f'): self.tpath.setWordSpace(*params) + def parse_set_hscale(self, token='Tz', params='f'): self.tpath.setHorizScale(params[0] - 100) + def parse_set_rise(self, token='Ts', params='f'): self.tpath.setRise(*params) + def parse_xobject(self, token='Do', params='n'): # TODO: Need to do this pass + class FontInfo(object): ''' Pretty basic -- needs a lot of work to work right for all fonts ''' lookup = { - 'BitstreamVeraSans' : 'Helvetica', # WRONG -- have to learn about font stuff... + # WRONG -- have to learn about font stuff... + 'BitstreamVeraSans': 'Helvetica', } def __init__(self, source): @@ -254,16 +307,19 @@ class FontInfo(object): assert not len(info) & 1 info2 = [] for x in info: - assert x[0] == '<' and x[-1] == '>' and len(x) in (4,6), x + assert x[0] == '<' and x[-1] == '>' and len(x) in (4, 6), x i = int(x[1:-1], 16) info2.append(i) - self.remap = dict((x,chr(y)) for (x,y) in zip(info2[::2], info2[1::2])).get + self.remap = dict((x, chr(y)) for (x, y) in + zip(info2[::2], info2[1::2])).get self.twobyte = len(info[0]) > 4 ############################################################################# # Control structures + def findparsefuncs(): + def checkname(n): assert n.startswith('/') return n @@ -276,8 +332,10 @@ def findparsefuncs(): assert isinstance(t, PdfString) return t - fixparam = dict(f=float, i=int, n=checkname, a=checkarray, s=str, t=checktext) + fixparam = dict(f=float, i=int, n=checkname, a=checkarray, + s=str, t=checktext) fixcache = {} + def fixlist(params): try: result = fixcache[params] @@ -288,12 +346,12 @@ def findparsefuncs(): dispatch = {} expected_args = 'self token params'.split() - for key, func in globals().iteritems(): + for key, func in globals().items(): if key.startswith('parse_'): args, varargs, keywords, defaults = getargspec(func) - assert args == expected_args and varargs is None \ - and keywords is None and len(defaults) == 2, \ - (key, args, varargs, keywords, defaults) + assert (args == expected_args and varargs is None and + keywords is None and len(defaults) == 2), ( + key, args, varargs, keywords, defaults) token, params = defaults if params is not None: params = fixlist(params) @@ -301,6 +359,7 @@ def findparsefuncs(): assert dispatch.setdefault(token, value) is value, repr(token) return dispatch + class _ParseClass(object): dispatch = findparsefuncs() @@ -309,14 +368,16 @@ class _ParseClass(object): self = cls() contents = page.Contents if contents.Filter is not None: - raise SystemExit('Cannot parse graphics -- page encoded with %s' % contents.Filter) + raise SystemExit('Cannot parse graphics -- page encoded with %s' + % contents.Filter) dispatch = cls.dispatch.get self.tokens = tokens = iter(PdfTokens(contents.stream)) self.params = params = [] self.canv = canvas self.gpath = None self.tpath = None - self.fontdict = dict((x,FontInfo(y)) for (x, y) in page.Resources.Font.iteritems()) + self.fontdict = dict((x, FontInfo(y)) for + (x, y) in page.Resources.Font.iteritems()) for token in self.tokens: info = dispatch(token) @@ -330,35 +391,40 @@ class _ParseClass(object): delta = len(params) - len(paraminfo) if delta: if delta < 0: - print 'Operator %s expected %s parameters, got %s' % (token, len(paraminfo), params) + print ('Operator %s expected %s parameters, got %s' % + (token, len(paraminfo), params)) params[:] = [] continue else: - print "Unparsed parameters/commands:", params[:delta] + print ("Unparsed parameters/commands: %s" % params[:delta]) del params[:delta] paraminfo = zip(paraminfo, params) try: - params[:] = [x(y) for (x,y) in paraminfo] + params[:] = [x(y) for (x, y) in paraminfo] except: - for i, (x,y) in enumerate(paraminfo): + for i, (x, y) in enumerate(paraminfo): try: x(y) except: - raise # For now + raise # For now continue func(self, token, params) params[:] = [] -def debugparser(undisturbed = set('parse_array'.split())): + +def debugparser(undisturbed=set('parse_array'.split())): def debugdispatch(): def getvalue(oldval): name = oldval[0].__name__ + def myfunc(self, token, params): - print '%s called %s(%s)' % (token, name, ', '.join(str(x) for x in params)) + print ('%s called %s(%s)' % (token, name, + ', '.join(str(x) for x in params))) if name in undisturbed: myfunc = oldval[0] return myfunc, oldval[1] - return dict((x, getvalue(y)) for (x,y) in _ParseClass.dispatch.iteritems()) + return dict((x, getvalue(y)) + for (x, y) in _ParseClass.dispatch.iteritems()) class _DebugParse(_ParseClass): dispatch = debugdispatch() @@ -374,5 +440,5 @@ if __name__ == '__main__': fname, = sys.argv[1:] pdf = PdfReader(fname) for i, page in enumerate(pdf.pages): - print '\nPage %s ------------------------------------' % i + print ('\nPage %s ------------------------------------' % i) parse(page) |