summaryrefslogtreecommitdiff
path: root/pdfrw/buildxobj.py
diff options
context:
space:
mode:
authorMatthias Klose <doko@debian.org>2014-07-13 17:50:59 +0200
committerMatthias Klose <doko@debian.org>2014-07-13 17:50:59 +0200
commita1959ba9c0c9f3881c3e593e5aef1046750880f2 (patch)
treee4fc630e9e26b227d9a7e41db65d80f6158e8ae9 /pdfrw/buildxobj.py
pdfrw (0.1-3) unstable; urgency=medium
* QA upload. * Build using dh_python2 # imported from the archive
Diffstat (limited to 'pdfrw/buildxobj.py')
-rw-r--r--pdfrw/buildxobj.py249
1 files changed, 249 insertions, 0 deletions
diff --git a/pdfrw/buildxobj.py b/pdfrw/buildxobj.py
new file mode 100644
index 0000000..ba34f61
--- /dev/null
+++ b/pdfrw/buildxobj.py
@@ -0,0 +1,249 @@
+# A part of pdfrw (pdfrw.googlecode.com)
+# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas
+# MIT license -- See LICENSE.txt for details
+
+'''
+
+This module contains code to build PDF "Form XObjects".
+
+A Form XObject allows a fragment from one PDF file to be cleanly
+included in another PDF file.
+
+Reference for syntax: "Parameters for opening PDF files" from SDK 8.1
+
+ http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf
+
+ supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>'
+
+ Also supported by this, but not by Adobe:
+ 'rotate=xxx' where xxx in [0, 90, 180, 270]
+
+ Units are in points
+
+
+Reference for content: Adobe PDF reference, sixth edition, version 1.7
+
+ http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
+
+ Form xobjects discussed chapter 4.9, page 355
+'''
+
+from pdfrw.objects import PdfDict, PdfArray, PdfName
+from pdfrw.pdfreader import PdfReader
+from pdfrw.errors import log
+
+class ViewInfo(object):
+ ''' Instantiate ViewInfo with a uri, and it will parse out
+ the filename, page, and viewrect into object attributes.
+ '''
+ doc = None
+ docname = None
+ page = None
+ viewrect = None
+ rotate = None
+
+ def __init__(self, pageinfo='', **kw):
+ pageinfo=pageinfo.split('#',1)
+ if len(pageinfo) == 2:
+ pageinfo[1:] = pageinfo[1].replace('&', '#').split('#')
+ for key in 'page viewrect'.split():
+ if pageinfo[0].startswith(key+'='):
+ break
+ else:
+ self.docname = pageinfo.pop(0)
+ for item in pageinfo:
+ key, value = item.split('=')
+ key = key.strip()
+ value = value.replace(',', ' ').split()
+ if key in ('page', 'rotate'):
+ assert len(value) == 1
+ setattr(self, key, int(value[0]))
+ elif key == 'viewrect':
+ assert len(value) == 4
+ setattr(self, key, [float(x) for x in value])
+ else:
+ log.error('Unknown option: %s', key)
+ for key, value in kw.iteritems():
+ assert hasattr(self, key), key
+ setattr(self, key, value)
+
+def get_rotation(rotate):
+ ''' Return clockwise rotation code:
+ 0 = unrotated
+ 1 = 90 degrees
+ 2 = 180 degrees
+ 3 = 270 degrees
+ '''
+ try:
+ rotate = int(rotate)
+ except (ValueError, TypeError):
+ return 0
+ if rotate % 90 != 0:
+ return 0
+ return rotate / 90
+
+def rotate_point(point, rotation):
+ ''' Rotate an (x,y) coordinate clockwise by a
+ rotation code specifying a multiple of 90 degrees.
+ '''
+ if rotation & 1:
+ point = point[1], -point[0]
+ if rotation & 2:
+ point = -point[0], -point[1]
+ return point
+
+def rotate_rect(rect, rotation):
+ ''' Rotate both points within the rectangle, then normalize
+ the rectangle by returning the new lower left, then new
+ upper right.
+ '''
+ rect = rotate_point(rect[:2], rotation) + rotate_point(rect[2:], rotation)
+ return (min(rect[0], rect[2]), min(rect[1], rect[3]),
+ max(rect[0], rect[2]), max(rect[1], rect[3]))
+
+def getrects(inheritable, pageinfo, rotation):
+ ''' Given the inheritable attributes of a page and
+ the desired pageinfo rectangle, return the page's
+ media box and the calculated boundary (clip) box.
+ '''
+ mbox = tuple([float(x) for x in inheritable.MediaBox])
+ vrect = pageinfo.viewrect
+ if vrect is None:
+ cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)])
+ else:
+ # Rotate the media box to match what the user sees,
+ # figure out the clipping box, then rotate back
+ mleft, mbot, mright, mtop = rotate_rect(mbox, rotation)
+ x, y, w, h = vrect
+ cleft = mleft + x
+ ctop = mtop - y
+ cright = cleft + w
+ cbot = ctop - h
+ cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop)
+ cbox = rotate_rect(cbox, -rotation)
+ return mbox, cbox
+
+
+def _cache_xobj(contents, resources, mbox, bbox, rotation):
+ ''' Return a cached Form XObject, or create a new one and cache it.
+ Adds private members x, y, w, h
+ '''
+ cachedict = contents.xobj_cachedict
+ if cachedict is None:
+ cachedict = contents.private.xobj_cachedict = {}
+ cachekey = mbox, bbox, rotation
+ result = cachedict.get(cachekey)
+ if result is None:
+ func = (_get_fullpage, _get_subpage)[mbox != bbox]
+ result = PdfDict(
+ func(contents, resources, mbox, bbox, rotation),
+ Type = PdfName.XObject,
+ Subtype = PdfName.Form,
+ FormType = 1,
+ BBox = PdfArray(bbox),
+ )
+ rect = bbox
+ if rotation:
+ matrix = rotate_point((1, 0), rotation) + rotate_point((0, 1), rotation)
+ result.Matrix = PdfArray(matrix + (0, 0))
+ rect = rotate_rect(rect, rotation)
+
+ result.private.x = rect[0]
+ result.private.y = rect[1]
+ result.private.w = rect[2] - rect[0]
+ result.private.h = rect[3] - rect[1]
+ cachedict[cachekey] = result
+ return result
+
+def _get_fullpage(contents, resources, mbox, bbox, rotation):
+ ''' fullpage is easy. Just copy the contents,
+ set up the resources, and let _cache_xobj handle the
+ rest.
+ '''
+ return PdfDict(contents, Resources=resources)
+
+def _get_subpage(contents, resources, mbox, bbox, rotation):
+ ''' subpages *could* be as easy as full pages, but we
+ choose to complicate life by creating a Form XObject
+ for the page, and then one that references it for
+ the subpage, on the off-chance that we want multiple
+ items from the page.
+ '''
+ return PdfDict(
+ stream = '/FullPage Do\n',
+ Resources = PdfDict(
+ XObject = PdfDict(
+ FullPage = _cache_xobj(contents, resources, mbox, mbox, 0)
+ )
+ )
+ )
+
+def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
+ ''' pagexobj creates and returns a Form XObject for
+ a given view within a page (Defaults to entire page.)
+ '''
+ inheritable = page.inheritable
+ resources = inheritable.Resources
+ rotation = get_rotation(inheritable.Rotate)
+ mbox, bbox = getrects(inheritable, viewinfo, rotation)
+ rotation += get_rotation(viewinfo.rotate)
+ contents = page.Contents
+ # Make sure the only attribute is length
+ # All the filters must have been executed
+ assert int(contents.Length) == len(contents.stream)
+ if not allow_compressed:
+ assert len([x for x in contents.iteritems()]) == 1
+ return _cache_xobj(contents, resources, mbox, bbox, rotation)
+
+
+
+def docxobj(pageinfo, doc=None, allow_compressed=True):
+ ''' docxobj creates and returns an actual Form XObject.
+ Can work standalone, or in conjunction with
+ the CacheXObj class (below).
+ '''
+ if not isinstance(pageinfo, ViewInfo):
+ pageinfo = ViewInfo(pageinfo)
+
+ # If we're explicitly passed a document,
+ # make sure we don't have one implicitly as well.
+ # If no implicit or explicit doc, then read one in
+ # from the filename.
+ if doc is not None:
+ assert pageinfo.doc is None
+ pageinfo.doc = doc
+ elif pageinfo.doc is not None:
+ doc = pageinfo.doc
+ else:
+ doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed)
+ assert isinstance(doc, PdfReader)
+
+ sourcepage = doc.pages[(pageinfo.page or 1) - 1]
+ return pagexobj(sourcepage, pageinfo, allow_compressed)
+
+
+class CacheXObj(object):
+ ''' Use to keep from reparsing files over and over,
+ and to keep from making the output too much
+ bigger than it ought to be by replicating
+ unnecessary object copies.
+ '''
+ def __init__(self, decompress=False):
+ ''' Set decompress true if you need
+ the Form XObjects to be decompressed.
+ Will decompress what it can and scream
+ about the rest.
+ '''
+ self.cached_pdfs = {}
+ self.decompress = decompress
+
+ def load(self, sourcename):
+ ''' Load a Form XObject from a uri
+ '''
+ info = ViewInfo(sourcename)
+ fname = info.docname
+ pcache = self.cached_pdfs
+ doc = pcache.get(fname)
+ if doc is None:
+ doc = pcache[fname] = PdfReader(fname, decompress=self.decompress)
+ return docxobj(info, doc, allow_compressed=not self.decompress)