diff options
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/4up.py | 66 | ||||
-rwxr-xr-x | examples/alter.py | 1 | ||||
-rwxr-xr-x | examples/booklet.py | 60 | ||||
-rwxr-xr-x | examples/cat.py | 35 | ||||
-rwxr-xr-x | examples/extract.py | 27 | ||||
-rw-r--r-- | examples/find_pdfrw.py | 33 | ||||
-rwxr-xr-x | examples/metadata.py | 39 | ||||
-rwxr-xr-x | examples/poster.py | 70 | ||||
-rwxr-xr-x | examples/print_two.py | 44 | ||||
-rwxr-xr-x | examples/rl1/4up.py | 1 | ||||
-rwxr-xr-x | examples/rl1/booklet.py | 3 | ||||
-rw-r--r-- | examples/rl1/find_pdfrw.py | 33 | ||||
-rwxr-xr-x | examples/rl1/platypus_pdf_template.py | 30 | ||||
-rwxr-xr-x | examples/rl1/subset.py | 7 | ||||
-rwxr-xr-x | examples/rl2/copy.py | 2 | ||||
-rw-r--r-- | examples/rl2/decodegraphics.py | 114 | ||||
-rw-r--r-- | examples/rl2/find_pdfrw.py | 33 | ||||
-rwxr-xr-x | examples/rotate.py | 4 | ||||
-rwxr-xr-x | examples/subset.py | 1 | ||||
-rwxr-xr-x | examples/unspread.py | 32 | ||||
-rwxr-xr-x | examples/watermark.py | 123 |
21 files changed, 306 insertions, 452 deletions
diff --git a/examples/4up.py b/examples/4up.py index 491af14..ad2bd3b 100755 --- a/examples/4up.py +++ b/examples/4up.py @@ -1,51 +1,33 @@ #!/usr/bin/env python ''' -usage: 4up.py my.pdf firstpage lastpage - -Creates 4up.my.pdf +usage: 4up.py my.pdf +Creates 4up.my.pdf with a single output page for every +4 input pages. ''' import sys import os -import find_pdfrw -from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, PdfArray -from pdfrw.buildxobj import pagexobj - -def get4(allpages): - # Pull a maximum of 4 pages off the list - pages = [pagexobj(x) for x in allpages[:4]] - del allpages[:4] - - x_max = max(page.BBox[2] for page in pages) - y_max = max(page.BBox[3] for page in pages) - - stream = [] - xobjdict = PdfDict() - for index, page in enumerate(pages): - x = x_max * (index & 1) / 2.0 - y = y_max * (index <= 1) / 2.0 - index = '/P%s' % index - stream.append('q 0.5 0 0 0.5 %s %s cm %s Do Q\n' % (x, y, index)) - xobjdict[index] = page - - return PdfDict( - Type = PdfName.Page, - Contents = PdfDict(stream=''.join(stream)), - MediaBox = PdfArray([0, 0, x_max, y_max]), - Resources = PdfDict(XObject = xobjdict), - ) - -def go(inpfn, outfn): - pages = PdfReader(inpfn).pages - writer = PdfWriter() - while pages: - writer.addpage(get4(pages)) - writer.write(outfn) - -if __name__ == '__main__': - inpfn, = sys.argv[1:] - outfn = '4up.' + os.path.basename(inpfn) - go(inpfn, outfn) +from pdfrw import PdfReader, PdfWriter, PageMerge + + +def get4(srcpages): + scale = 0.5 + srcpages = PageMerge() + srcpages + x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:]) + for i, page in enumerate(srcpages): + page.scale(scale) + page.x = x_increment if i & 1 else 0 + page.y = 0 if i & 2 else y_increment + return srcpages.render() + + +inpfn, = sys.argv[1:] +outfn = '4up.' + os.path.basename(inpfn) +pages = PdfReader(inpfn).pages +writer = PdfWriter() +for index in range(0, len(pages), 4): + writer.addpage(get4(pages[index:index + 4])) +writer.write(outfn) diff --git a/examples/alter.py b/examples/alter.py index 1c6d4e1..45b9c76 100755 --- a/examples/alter.py +++ b/examples/alter.py @@ -12,7 +12,6 @@ Demonstrates making a slight alteration to a preexisting PDF file. import sys import os -import find_pdfrw from pdfrw import PdfReader, PdfWriter inpfn, = sys.argv[1:] diff --git a/examples/booklet.py b/examples/booklet.py index 0b3be74..4758b08 100755 --- a/examples/booklet.py +++ b/examples/booklet.py @@ -5,61 +5,35 @@ usage: booklet.py my.pdf Creates booklet.my.pdf -Pages organized in a form suitable for booklet printing. - +Pages organized in a form suitable for booklet printing, e.g. +to print 4 8.5x11 pages using a single 11x17 sheet (double-sided). ''' import sys import os -import find_pdfrw -from pdfrw import PdfReader, PdfWriter, PdfDict, PdfArray, PdfName, IndirectPdfDict -from pdfrw.buildxobj import pagexobj - -def fixpage(*pages): - pages = [pagexobj(x) for x in pages] +from pdfrw import PdfReader, PdfWriter, PageMerge - class PageStuff(tuple): - pass - x = y = 0 - for i, page in enumerate(pages): - index = '/P%s' % i - shift_right = x and '1 0 0 1 %s 0 cm ' % x or '' - stuff = PageStuff((index, page)) - stuff.stream = 'q %s%s Do Q\n' % (shift_right, index) - x += page.BBox[2] - y = max(y, page.BBox[3]) - pages[i] = stuff - - # Multiple copies of first page used as a placeholder to - # get blank page on back. - for p1, p2 in zip(pages, pages[1:]): - if p1[1] is p2[1]: - pages.remove(p1) +def fixpage(*pages): + result = PageMerge() + (x for x in pages if x is not None) + result[-1].x += result[0].w + return result.render() - return IndirectPdfDict( - Type = PdfName.Page, - Contents = PdfDict(stream=''.join(page.stream for page in pages)), - MediaBox = PdfArray([0, 0, x, y]), - Resources = PdfDict( - XObject = PdfDict(pages), - ), - ) inpfn, = sys.argv[1:] outfn = 'booklet.' + os.path.basename(inpfn) -pages = PdfReader(inpfn).pages +ipages = PdfReader(inpfn).pages -# Use page1 as a marker to print a blank at the end -if len(pages) & 1: - pages.append(pages[0]) +# Make sure we have an even number +if len(ipages) & 1: + ipages.append(None) -bigpages = [] -while len(pages) > 2: - bigpages.append(fixpage(pages.pop(), pages.pop(0))) - bigpages.append(fixpage(pages.pop(0), pages.pop())) +opages = [] +while len(ipages) > 2: + opages.append(fixpage(ipages.pop(), ipages.pop(0))) + opages.append(fixpage(ipages.pop(0), ipages.pop())) -bigpages += pages +opages += ipages -PdfWriter().addpages(bigpages).write(outfn) +PdfWriter().addpages(opages).write(outfn) diff --git a/examples/cat.py b/examples/cat.py new file mode 100755 index 0000000..86cf643 --- /dev/null +++ b/examples/cat.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +''' +usage: cat.py <first.pdf> [<next.pdf> ...] + +Creates cat.<first.pdf> + +This file demonstrates two features: + +1) Concatenating multiple input PDFs. + +2) adding metadata to the PDF. + +''' + +import sys +import os + +from pdfrw import PdfReader, PdfWriter, IndirectPdfDict + +inputs = sys.argv[1:] +assert inputs +outfn = 'cat.' + os.path.basename(inputs[0]) + +writer = PdfWriter() +for inpfn in inputs: + writer.addpages(PdfReader(inpfn).pages) + +writer.trailer.Info = IndirectPdfDict( + Title='your title goes here', + Author='your name goes here', + Subject='what is it all about?', + Creator='some script goes here', +) +writer.write(outfn) diff --git a/examples/extract.py b/examples/extract.py new file mode 100755 index 0000000..3756b4f --- /dev/null +++ b/examples/extract.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +''' +usage: extract.py <some.pdf> + +Locates Form XObjects and Image XObjects within the PDF, +and creates a new PDF containing these -- one per page. + +Resulting file will be named extract.<some.pdf> + +''' + +import sys +import os + +from pdfrw import PdfReader, PdfWriter +from pdfrw.findobjs import page_per_xobj + + +inpfn, = sys.argv[1:] +outfn = 'extract.' + os.path.basename(inpfn) +pages = list(page_per_xobj(PdfReader(inpfn).pages, margin=0.5*72)) +if not pages: + raise IndexError("No XObjects found") +writer = PdfWriter() +writer.addpages(pages) +writer.write(outfn) diff --git a/examples/find_pdfrw.py b/examples/find_pdfrw.py deleted file mode 100644 index f69d412..0000000 --- a/examples/find_pdfrw.py +++ /dev/null @@ -1,33 +0,0 @@ -''' - find_xxx.py -- Find the place in the tree where xxx lives. - - Ways to use: - 1) Make a copy, change 'xxx' in package to be your name; or - 2) Under Linux, just ln -s to where this is in the right tree - - Created by Pat Maupin, who doesn't consider it big enough to be worth copyrighting -''' - -import sys -import os - -myname = __name__[5:] # remove 'find_' -myname = os.path.join(myname, '__init__.py') - -def trypath(newpath): - path = None - while path != newpath: - path = newpath - if os.path.exists(os.path.join(path, myname)): - return path - newpath = os.path.dirname(path) - -root = trypath(__file__) or trypath(os.path.realpath(__file__)) - -if root is None: - print - print 'Warning: %s: Could not find path to development package %s' % (__file__, myname) - print ' The import will either fail or will use system-installed libraries' - print -elif root not in sys.path: - sys.path.append(root) diff --git a/examples/metadata.py b/examples/metadata.py deleted file mode 100755 index 9c69108..0000000 --- a/examples/metadata.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python - -''' -usage: metadata.py <first.pdf> [<next.pdf> ...] - -Creates output.pdf - -This file demonstrates two features: - -1) Concatenating multiple input PDFs. - -2) adding metadata to the PDF. - -If you do not need to add metadata, look at subset.py, which -has a simpler interface to PdfWriter. - -''' - -import sys -import os - -import find_pdfrw -from pdfrw import PdfReader, PdfWriter, IndirectPdfDict - -inputs = sys.argv[1:] -assert inputs -outfn = 'output.pdf' - -writer = PdfWriter() -for inpfn in inputs: - writer.addpages(PdfReader(inpfn.pages) - -writer.trailer.Info = IndirectPdfDict( - Title = 'your title goes here', - Author = 'your name goes here', - Subject = 'what is it all about?', - Creator = 'some script goes here', -) -writer.write(outfn) diff --git a/examples/poster.py b/examples/poster.py index ee58282..7f1c1c2 100755 --- a/examples/poster.py +++ b/examples/poster.py @@ -7,51 +7,37 @@ Shows how to change the size on a PDF. Motivation: -My daughter needed to create a 48" x 36" poster, but her Mac version of Powerpoint -only wanted to output 8.5" x 11" for some reason. +My daughter needed to create a 48" x 36" poster, but her Mac +version of Powerpoint only wanted to output 8.5" x 11" for +some reason. + +So she did an 8.5x11" output with 0.5" margin all around +(actual size of useful area 7.5x10") and we scaled it +up by 4.8. + +We also copy the Info dict to the new PDF. ''' import sys import os -import find_pdfrw -from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, PdfArray, IndirectPdfDict -from pdfrw.buildxobj import pagexobj - -def adjust(page): - page = pagexobj(page) - assert page.BBox == [0, 0, 11 * 72, int(8.5 * 72)], page.BBox - margin = 72 // 2 - old_x, old_y = page.BBox[2] - 2 * margin, page.BBox[3] - 2 * margin - - new_x, new_y = 48 * 72, 36 * 72 - ratio = 1.0 * new_x / old_x - assert ratio == 1.0 * new_y / old_y - - index = '/BasePage' - x = -margin * ratio - y = -margin * ratio - stream = 'q %0.2f 0 0 %0.2f %s %s cm %s Do Q\n' % (ratio, ratio, x, y, index) - xobjdict = PdfDict() - xobjdict[index] = page - - return PdfDict( - Type = PdfName.Page, - Contents = PdfDict(stream=stream), - MediaBox = PdfArray([0, 0, new_x, new_y]), - Resources = PdfDict(XObject = xobjdict), - ) - -def go(inpfn, outfn): - reader = PdfReader(inpfn) - page, = reader.pages - writer = PdfWriter() - writer.addpage(adjust(page)) - writer.trailer.Info = IndirectPdfDict(reader.Info) - writer.write(outfn) - -if __name__ == '__main__': - inpfn, = sys.argv[1:] - outfn = 'poster.' + os.path.basename(inpfn) - go(inpfn, outfn) +from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict + + +def adjust(page, margin=36, scale=4.8): + info = PageMerge().add(page) + x1, y1, x2, y2 = info.xobj_box + viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin) + page = PageMerge().add(page, viewrect=viewrect) + page[0].scale(scale) + return page.render() + + +inpfn, = sys.argv[1:] +outfn = 'poster.' + os.path.basename(inpfn) +reader = PdfReader(inpfn) +writer = PdfWriter() +writer.addpage(adjust(reader.pages[0])) +writer.trailer.Info = IndirectPdfDict(reader.Info or {}) +writer.write(outfn) diff --git a/examples/print_two.py b/examples/print_two.py index d168f68..c54eaee 100755 --- a/examples/print_two.py +++ b/examples/print_two.py @@ -7,52 +7,26 @@ Creates print_two.my.pdf This is only useful when you can cut down sheets of paper to make two small documents. Works for double-sided only right now. - ''' import sys import os -import find_pdfrw -from pdfrw import PdfReader, PdfWriter, PdfArray, IndirectPdfDict +from pdfrw import PdfReader, PdfWriter, PageMerge + def fixpage(page, count=[0]): count[0] += 1 - evenpage = not (count[0] & 1) - - # For demo purposes, just go with the MediaBox and toast the others - box = [float(x) for x in page.MediaBox] - assert box[0] == box[1] == 0, "demo won't work on this PDF" - - for key, value in sorted(page.iteritems()): - if 'box' in key.lower(): - del page[key] - - startsize = tuple(box[2:]) - finalsize = box[3], 2 * box[2] - page.MediaBox = PdfArray((0, 0) + finalsize) - page.Rotate = (int(page.Rotate or 0) + 90) % 360 - - contents = page.Contents - if contents is None: - return page - contents = isinstance(contents, dict) and [contents] or contents - - prefix = '0 1 -1 0 %s %s cm\n' % (finalsize[0], 0) - if evenpage: - prefix = '1 0 0 1 %s %s cm\n' % (0, finalsize[1]/2) + prefix - first_prefix = 'q\n-1 0 0 -1 %s %s cm\n' % finalsize + prefix - second_prefix = '\nQ\n' + prefix - first_prefix = IndirectPdfDict(stream=first_prefix) - second_prefix = IndirectPdfDict(stream=second_prefix) - contents = PdfArray(([second_prefix] + contents) * 2) - contents[0] = first_prefix - page.Contents = contents - return page + oddpage = (count[0] & 1) + + result = PageMerge() + for rotation in (180 + 180 * oddpage, 180 * oddpage): + result.add(page, rotate=rotation) + result[1].x = result[0].w + return result.render() inpfn, = sys.argv[1:] outfn = 'print_two.' + os.path.basename(inpfn) pages = PdfReader(inpfn).pages - PdfWriter().addpages(fixpage(x) for x in pages).write(outfn) diff --git a/examples/rl1/4up.py b/examples/rl1/4up.py index 385103a..49f766e 100755 --- a/examples/rl1/4up.py +++ b/examples/rl1/4up.py @@ -15,7 +15,6 @@ import os from reportlab.pdfgen.canvas import Canvas -import find_pdfrw from pdfrw import PdfReader from pdfrw.buildxobj import pagexobj from pdfrw.toreportlab import makerl diff --git a/examples/rl1/booklet.py b/examples/rl1/booklet.py index 23f8a72..e7764a0 100755 --- a/examples/rl1/booklet.py +++ b/examples/rl1/booklet.py @@ -15,7 +15,6 @@ import os from reportlab.pdfgen.canvas import Canvas -import find_pdfrw from pdfrw import PdfReader from pdfrw.buildxobj import pagexobj from pdfrw.toreportlab import makerl @@ -43,7 +42,7 @@ def make_pdf(outfn, xobjpairs): x += xobj.BBox[2] y = max(y, xobj.BBox[3]) - canvas.setPageSize((x,y)) + canvas.setPageSize((x, y)) # Handle blank back page if len(xobjlist) > 1 and xobjlist[0] == xobjlist[-1]: diff --git a/examples/rl1/find_pdfrw.py b/examples/rl1/find_pdfrw.py deleted file mode 100644 index f69d412..0000000 --- a/examples/rl1/find_pdfrw.py +++ /dev/null @@ -1,33 +0,0 @@ -''' - find_xxx.py -- Find the place in the tree where xxx lives. - - Ways to use: - 1) Make a copy, change 'xxx' in package to be your name; or - 2) Under Linux, just ln -s to where this is in the right tree - - Created by Pat Maupin, who doesn't consider it big enough to be worth copyrighting -''' - -import sys -import os - -myname = __name__[5:] # remove 'find_' -myname = os.path.join(myname, '__init__.py') - -def trypath(newpath): - path = None - while path != newpath: - path = newpath - if os.path.exists(os.path.join(path, myname)): - return path - newpath = os.path.dirname(path) - -root = trypath(__file__) or trypath(os.path.realpath(__file__)) - -if root is None: - print - print 'Warning: %s: Could not find path to development package %s' % (__file__, myname) - print ' The import will either fail or will use system-installed libraries' - print -elif root not in sys.path: - sys.path.append(root) diff --git a/examples/rl1/platypus_pdf_template.py b/examples/rl1/platypus_pdf_template.py index 8c85794..7e4769a 100755 --- a/examples/rl1/platypus_pdf_template.py +++ b/examples/rl1/platypus_pdf_template.py @@ -1,17 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ -usage: platypus_pdf_template.py output.pdf pdf_file_to_use_as_template.pdf +usage: platypus_pdf_template.py source.pdf -Example of using pdfrw to use a pdf (page one) as the background for all -other pages together with platypus. +Creates platypus.source.pdf -There is a table of contents in this example for completeness sake. +Example of using pdfrw to use page 1 of a source PDF as the background +for other pages programmatically generated with Platypus. Contributed by user asannes """ import sys +import os from reportlab.platypus import PageTemplate, BaseDocTemplate, Frame from reportlab.platypus import NextPageTemplate, Paragraph, PageBreak @@ -21,7 +22,6 @@ from reportlab.rl_config import defaultPageSize from reportlab.lib.units import inch from reportlab.graphics import renderPDF -import find_pdfrw from pdfrw import PdfReader from pdfrw.buildxobj import pagexobj from pdfrw.toreportlab import makerl @@ -29,6 +29,7 @@ from pdfrw.toreportlab import makerl PAGE_WIDTH = defaultPageSize[0] PAGE_HEIGHT = defaultPageSize[1] + class MyTemplate(PageTemplate): """The kernel of this example, where we use pdfrw to fill in the background of a page before writing to it. This could be used to fill @@ -57,6 +58,7 @@ class MyTemplate(PageTemplate): canvas.doForm(rl_obj) canvas.restoreState() + class MyDocTemplate(BaseDocTemplate): """Used to apply heading to table of contents.""" @@ -70,20 +72,22 @@ class MyDocTemplate(BaseDocTemplate): self.canv.bookmarkPage(key) self.notify('TOCEntry', [1, text, self.page, key]) + def create_toc(): """Creates the table of contents""" table_of_contents = TableOfContents() table_of_contents.dotsMinLevel = 0 - header1 = ParagraphStyle(name = 'Heading1', fontSize = 16, leading = 16) - header2 = ParagraphStyle(name = 'Heading2', fontSize = 14, leading = 14) + header1 = ParagraphStyle(name='Heading1', fontSize=16, leading=16) + header2 = ParagraphStyle(name='Heading2', fontSize=14, leading=14) table_of_contents.levelStyles = [header1, header2] return [table_of_contents, PageBreak()] + def create_pdf(filename, pdf_template_filename): """Create the pdf, with all the contents""" - pdf_report = open(filename, "w") + pdf_report = open(filename, "wb") document = MyDocTemplate(pdf_report) - templates = [ MyTemplate(pdf_template_filename, name='background') ] + templates = [MyTemplate(pdf_template_filename, name='background')] document.addPageTemplates(templates) styles = getSampleStyleSheet() @@ -99,8 +103,6 @@ def create_pdf(filename, pdf_template_filename): if __name__ == '__main__': - try: - output, template = sys.argv[1:] - create_pdf(output, template) - except ValueError: - print "Usage: %s <output> <template>" % (sys.argv[0]) + template, = sys.argv[1:] + output = 'platypus_pdf_template.' + os.path.basename(template) + create_pdf(output, template) diff --git a/examples/rl1/subset.py b/examples/rl1/subset.py index 8b2bf08..c05056c 100755 --- a/examples/rl1/subset.py +++ b/examples/rl1/subset.py @@ -17,7 +17,6 @@ import os from reportlab.pdfgen.canvas import Canvas -import find_pdfrw from pdfrw import PdfReader from pdfrw.buildxobj import pagexobj from pdfrw.toreportlab import makerl @@ -25,14 +24,14 @@ from pdfrw.toreportlab import makerl def go(inpfn, firstpage, lastpage): firstpage, lastpage = int(firstpage), int(lastpage) - outfn = 'subset_%s_to_%s.%s' % (firstpage, lastpage, os.path.basename(inpfn)) + outfn = 'subset.' + os.path.basename(inpfn) pages = PdfReader(inpfn).pages - pages = [pagexobj(x) for x in pages[firstpage-1:lastpage]] + pages = [pagexobj(x) for x in pages[firstpage - 1:lastpage]] canvas = Canvas(outfn) for page in pages: - canvas.setPageSize(tuple(page.BBox[2:])) + canvas.setPageSize((page.BBox[2], page.BBox[3])) canvas.doForm(makerl(canvas, page)) canvas.showPage() diff --git a/examples/rl2/copy.py b/examples/rl2/copy.py index 20dad71..66fe0c6 100755 --- a/examples/rl2/copy.py +++ b/examples/rl2/copy.py @@ -20,7 +20,7 @@ from pdfrw import PdfReader, PdfWriter, PdfArray inpfn, = sys.argv[1:] outfn = 'copy.' + os.path.basename(inpfn) -pages = PdfReader(inpfn).pages +pages = PdfReader(inpfn, decompress=True).pages canvas = Canvas(outfn, pageCompression=0) for page in pages: diff --git a/examples/rl2/decodegraphics.py b/examples/rl2/decodegraphics.py index 9fa5a05..e2f3a9f 100644 --- a/examples/rl2/decodegraphics.py +++ b/examples/rl2/decodegraphics.py @@ -1,4 +1,4 @@ -# A part of pdfrw (pdfrw.googlecode.com) +# A part of pdfrw (https://github.com/pmaupin/pdfrw) # Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas # MIT license -- See LICENSE.txt for details @@ -13,13 +13,13 @@ Better to use Form XObjects for most things (see the example in rl1). ''' from inspect import getargspec -import find_pdfrw from pdfrw import PdfTokens -from pdfrw.pdfobjects import PdfString +from pdfrw.objects import PdfString ############################################################################# # Graphics parsing + def parse_array(self, token='[', params=None): mylist = [] for token in self.tokens: @@ -28,105 +28,134 @@ def parse_array(self, token='[', params=None): mylist.append(token) self.params.append(mylist) + def parse_savestate(self, token='q', params=''): self.canv.saveState() + def parse_restorestate(self, token='Q', params=''): self.canv.restoreState() + def parse_transform(self, token='cm', params='ffffff'): self.canv.transform(*params) + def parse_linewidth(self, token='w', params='f'): self.canv.setLineWidth(*params) + def parse_linecap(self, token='J', params='i'): self.canv.setLineCap(*params) + def parse_linejoin(self, token='j', params='i'): self.canv.setLineJoin(*params) + def parse_miterlimit(self, token='M', params='f'): self.canv.setMiterLimit(*params) + def parse_dash(self, token='d', params='as'): # Array, string self.canv.setDash(*params) + def parse_intent(self, token='ri', params='n'): # TODO: add logging pass + def parse_flatness(self, token='i', params='i'): # TODO: add logging pass + def parse_gstate(self, token='gs', params='n'): # TODO: add logging # Could parse stuff we care about from here later pass + def parse_move(self, token='m', params='ff'): if self.gpath is None: self.gpath = self.canv.beginPath() self.gpath.moveTo(*params) self.current_point = params + def parse_line(self, token='l', params='ff'): self.gpath.lineTo(*params) self.current_point = params + def parse_curve(self, token='c', params='ffffff'): self.gpath.curveTo(*params) self.current_point = params[-2:] + def parse_curve1(self, token='v', params='ffff'): parse_curve(self, token, tuple(self.current_point) + tuple(params)) + def parse_curve2(self, token='y', params='ffff'): parse_curve(self, token, tuple(params) + tuple(params[-2:])) + def parse_close(self, token='h', params=''): self.gpath.close() + def parse_rect(self, token='re', params='ffff'): if self.gpath is None: self.gpath = self.canv.beginPath() self.gpath.rect(*params) self.current_point = params[-2:] + def parse_stroke(self, token='S', params=''): finish_path(self, 1, 0, 0) + def parse_close_stroke(self, token='s', params=''): self.gpath.close() finish_path(self, 1, 0, 0) + def parse_fill(self, token='f', params=''): finish_path(self, 0, 1, 1) + def parse_fill_compat(self, token='F', params=''): finish_path(self, 0, 1, 1) + def parse_fill_even_odd(self, token='f*', params=''): finish_path(self, 0, 1, 0) + def parse_fill_stroke_even_odd(self, token='B*', params=''): finish_path(self, 1, 1, 0) + def parse_fill_stroke(self, token='B', params=''): finish_path(self, 1, 1, 1) + def parse_close_fill_stroke_even_odd(self, token='b*', params=''): self.gpath.close() finish_path(self, 1, 1, 0) + def parse_close_fill_stroke(self, token='b', params=''): self.gpath.close() finish_path(self, 1, 1, 1) + def parse_nop(self, token='n', params=''): finish_path(self, 0, 0, 0) + def finish_path(self, stroke, fill, fillmode): if self.gpath is not None: canv = self.canv @@ -135,39 +164,49 @@ def finish_path(self, stroke, fill, fillmode): canv._fillMode = oldmode self.gpath = None + def parse_clip_path(self, token='W', params=''): # TODO: add logging pass + def parse_clip_path_even_odd(self, token='W*', params=''): # TODO: add logging pass + def parse_stroke_gray(self, token='G', params='f'): self.canv.setStrokeGray(*params) + def parse_fill_gray(self, token='g', params='f'): self.canv.setFillGray(*params) + def parse_stroke_rgb(self, token='RG', params='fff'): self.canv.setStrokeColorRGB(*params) + def parse_fill_rgb(self, token='rg', params='fff'): self.canv.setFillColorRGB(*params) + def parse_stroke_cmyk(self, token='K', params='ffff'): self.canv.setStrokeColorCMYK(*params) + def parse_fill_cmyk(self, token='k', params='ffff'): self.canv.setFillColorCMYK(*params) ############################################################################# # Text parsing + def parse_begin_text(self, token='BT', params=''): assert self.tpath is None self.tpath = self.canv.beginText() + def parse_text_transform(self, token='Tm', params='ffffff'): path = self.tpath @@ -177,20 +216,23 @@ def parse_text_transform(self, token='Tm', params='ffffff'): except AttributeError: pass else: - if code[-1] == '1 0 0 1 0 0 Tm': + if code[-1] == '1 0 0 1 0 0 Tm': code.pop() path.setTextTransform(*params) + def parse_setfont(self, token='Tf', params='nf'): fontinfo = self.fontdict[params[0]] self.tpath._setFont(fontinfo.name, params[1]) self.curfont = fontinfo + def parse_text_out(self, token='Tj', params='t'): text = params[0].decode(self.curfont.remap, self.curfont.twobyte) self.tpath.textOut(text) + def parse_TJ(self, token='TJ', params='a'): remap = self.curfont.remap twobyte = self.curfont.twobyte @@ -204,41 +246,52 @@ def parse_TJ(self, token='TJ', params='a'): text = ''.join(result) self.tpath.textOut(text) + def parse_end_text(self, token='ET', params=''): assert self.tpath is not None self.canv.drawText(self.tpath) - self.tpath=None + self.tpath = None + def parse_move_cursor(self, token='Td', params='ff'): self.tpath.moveCursor(params[0], -params[1]) + def parse_set_leading(self, token='TL', params='f'): self.tpath.setLeading(*params) + def parse_text_line(self, token='T*', params=''): self.tpath.textLine() + def parse_set_char_space(self, token='Tc', params='f'): self.tpath.setCharSpace(*params) + def parse_set_word_space(self, token='Tw', params='f'): self.tpath.setWordSpace(*params) + def parse_set_hscale(self, token='Tz', params='f'): self.tpath.setHorizScale(params[0] - 100) + def parse_set_rise(self, token='Ts', params='f'): self.tpath.setRise(*params) + def parse_xobject(self, token='Do', params='n'): # TODO: Need to do this pass + class FontInfo(object): ''' Pretty basic -- needs a lot of work to work right for all fonts ''' lookup = { - 'BitstreamVeraSans' : 'Helvetica', # WRONG -- have to learn about font stuff... + # WRONG -- have to learn about font stuff... + 'BitstreamVeraSans': 'Helvetica', } def __init__(self, source): @@ -254,16 +307,19 @@ class FontInfo(object): assert not len(info) & 1 info2 = [] for x in info: - assert x[0] == '<' and x[-1] == '>' and len(x) in (4,6), x + assert x[0] == '<' and x[-1] == '>' and len(x) in (4, 6), x i = int(x[1:-1], 16) info2.append(i) - self.remap = dict((x,chr(y)) for (x,y) in zip(info2[::2], info2[1::2])).get + self.remap = dict((x, chr(y)) for (x, y) in + zip(info2[::2], info2[1::2])).get self.twobyte = len(info[0]) > 4 ############################################################################# # Control structures + def findparsefuncs(): + def checkname(n): assert n.startswith('/') return n @@ -276,8 +332,10 @@ def findparsefuncs(): assert isinstance(t, PdfString) return t - fixparam = dict(f=float, i=int, n=checkname, a=checkarray, s=str, t=checktext) + fixparam = dict(f=float, i=int, n=checkname, a=checkarray, + s=str, t=checktext) fixcache = {} + def fixlist(params): try: result = fixcache[params] @@ -288,12 +346,12 @@ def findparsefuncs(): dispatch = {} expected_args = 'self token params'.split() - for key, func in globals().iteritems(): + for key, func in globals().items(): if key.startswith('parse_'): args, varargs, keywords, defaults = getargspec(func) - assert args == expected_args and varargs is None \ - and keywords is None and len(defaults) == 2, \ - (key, args, varargs, keywords, defaults) + assert (args == expected_args and varargs is None and + keywords is None and len(defaults) == 2), ( + key, args, varargs, keywords, defaults) token, params = defaults if params is not None: params = fixlist(params) @@ -301,6 +359,7 @@ def findparsefuncs(): assert dispatch.setdefault(token, value) is value, repr(token) return dispatch + class _ParseClass(object): dispatch = findparsefuncs() @@ -309,14 +368,16 @@ class _ParseClass(object): self = cls() contents = page.Contents if contents.Filter is not None: - raise SystemExit('Cannot parse graphics -- page encoded with %s' % contents.Filter) + raise SystemExit('Cannot parse graphics -- page encoded with %s' + % contents.Filter) dispatch = cls.dispatch.get self.tokens = tokens = iter(PdfTokens(contents.stream)) self.params = params = [] self.canv = canvas self.gpath = None self.tpath = None - self.fontdict = dict((x,FontInfo(y)) for (x, y) in page.Resources.Font.iteritems()) + self.fontdict = dict((x, FontInfo(y)) for + (x, y) in page.Resources.Font.iteritems()) for token in self.tokens: info = dispatch(token) @@ -330,35 +391,40 @@ class _ParseClass(object): delta = len(params) - len(paraminfo) if delta: if delta < 0: - print 'Operator %s expected %s parameters, got %s' % (token, len(paraminfo), params) + print ('Operator %s expected %s parameters, got %s' % + (token, len(paraminfo), params)) params[:] = [] continue else: - print "Unparsed parameters/commands:", params[:delta] + print ("Unparsed parameters/commands: %s" % params[:delta]) del params[:delta] paraminfo = zip(paraminfo, params) try: - params[:] = [x(y) for (x,y) in paraminfo] + params[:] = [x(y) for (x, y) in paraminfo] except: - for i, (x,y) in enumerate(paraminfo): + for i, (x, y) in enumerate(paraminfo): try: x(y) except: - raise # For now + raise # For now continue func(self, token, params) params[:] = [] -def debugparser(undisturbed = set('parse_array'.split())): + +def debugparser(undisturbed=set('parse_array'.split())): def debugdispatch(): def getvalue(oldval): name = oldval[0].__name__ + def myfunc(self, token, params): - print '%s called %s(%s)' % (token, name, ', '.join(str(x) for x in params)) + print ('%s called %s(%s)' % (token, name, + ', '.join(str(x) for x in params))) if name in undisturbed: myfunc = oldval[0] return myfunc, oldval[1] - return dict((x, getvalue(y)) for (x,y) in _ParseClass.dispatch.iteritems()) + return dict((x, getvalue(y)) + for (x, y) in _ParseClass.dispatch.iteritems()) class _DebugParse(_ParseClass): dispatch = debugdispatch() @@ -374,5 +440,5 @@ if __name__ == '__main__': fname, = sys.argv[1:] pdf = PdfReader(fname) for i, page in enumerate(pdf.pages): - print '\nPage %s ------------------------------------' % i + print ('\nPage %s ------------------------------------' % i) parse(page) diff --git a/examples/rl2/find_pdfrw.py b/examples/rl2/find_pdfrw.py deleted file mode 100644 index f69d412..0000000 --- a/examples/rl2/find_pdfrw.py +++ /dev/null @@ -1,33 +0,0 @@ -''' - find_xxx.py -- Find the place in the tree where xxx lives. - - Ways to use: - 1) Make a copy, change 'xxx' in package to be your name; or - 2) Under Linux, just ln -s to where this is in the right tree - - Created by Pat Maupin, who doesn't consider it big enough to be worth copyrighting -''' - -import sys -import os - -myname = __name__[5:] # remove 'find_' -myname = os.path.join(myname, '__init__.py') - -def trypath(newpath): - path = None - while path != newpath: - path = newpath - if os.path.exists(os.path.join(path, myname)): - return path - newpath = os.path.dirname(path) - -root = trypath(__file__) or trypath(os.path.realpath(__file__)) - -if root is None: - print - print 'Warning: %s: Could not find path to development package %s' % (__file__, myname) - print ' The import will either fail or will use system-installed libraries' - print -elif root not in sys.path: - sys.path.append(root) diff --git a/examples/rotate.py b/examples/rotate.py index 4277940..8b10d05 100755 --- a/examples/rotate.py +++ b/examples/rotate.py @@ -13,7 +13,6 @@ Creates rotate.my.pdf with selected pages rotated. Rotates all by default. import sys import os -import find_pdfrw from pdfrw import PdfReader, PdfWriter inpfn = sys.argv[1] @@ -34,7 +33,8 @@ if not ranges: for onerange in ranges: onerange = (onerange + onerange[-1:])[:2] for pagenum in range(onerange[0]-1, onerange[1]): - pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) + rotate) % 360 + pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or + 0) + rotate) % 360 outdata = PdfWriter() outdata.trailer = trailer diff --git a/examples/subset.py b/examples/subset.py index e7f8ec6..30a577a 100755 --- a/examples/subset.py +++ b/examples/subset.py @@ -11,7 +11,6 @@ Creates subset.my.pdf import sys import os -import find_pdfrw from pdfrw import PdfReader, PdfWriter inpfn = sys.argv[1] diff --git a/examples/unspread.py b/examples/unspread.py new file mode 100755 index 0000000..4b3bc5d --- /dev/null +++ b/examples/unspread.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +''' +usage: unspread.py my.pdf + +Creates unspread.my.pdf + +Chops each page in half, e.g. if a source were +created in booklet form, you could extract individual +pages. +''' + +import sys +import os + +from pdfrw import PdfReader, PdfWriter, PageMerge + + +def splitpage(src): + ''' Split a page into two (left and right) + ''' + # Yield a result for each half of the page + for x_pos in (0, 0.5): + yield PageMerge().add(src, viewrect=(x_pos, 0, 0.5, 1)).render() + + +inpfn, = sys.argv[1:] +outfn = 'unspread.' + os.path.basename(inpfn) +writer = PdfWriter() +for page in PdfReader(inpfn).pages: + writer.addpages(splitpage(page)) +writer.write(outfn) diff --git a/examples/watermark.py b/examples/watermark.py index a4d933c..96b686b 100755 --- a/examples/watermark.py +++ b/examples/watermark.py @@ -3,112 +3,31 @@ ''' Simple example of watermarking using form xobjects (pdfrw). -usage: watermark.py my.pdf single_page.pdf +usage: watermark.py [-u] my.pdf single_page.pdf Creates watermark.my.pdf, with every page overlaid with -first page from single_page.pdf +first page from single_page.pdf. If -u is selected, watermark +will be placed underneath page (painted first). + +NB: At one point, this example was extremely complicated, with + multiple options. That only led to errors in implementation, + so it has been re-simplified in order to show basic principles + of the library operation and to match the other examples better. ''' import sys import os -import find_pdfrw -from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, IndirectPdfDict, PdfArray -from pdfrw.buildxobj import pagexobj - -def fixpage(page, watermark): - - # Find the page's resource dictionary. Create if none - resources = page.inheritable.Resources - if resources is None: - resources = page.Resources = PdfDict() - - # Find or create the parent's xobject dictionary - xobjdict = resources.XObject - if xobjdict is None: - xobjdict = resources.XObject = PdfDict() - - # Allow for an infinite number of cascaded watermarks - index = 0 - while 1: - watermark_name = '/Watermark.%d' % index - if watermark_name not in xobjdict: - break - index += 1 - xobjdict[watermark_name] = watermark - - # Turn the contents into an array if it is not already one - contents = page.Contents - if not isinstance(contents, PdfArray): - contents = page.Contents = PdfArray([contents]) - - # Save initial state before executing page - contents.insert(0, IndirectPdfDict(stream='q\n')) - - # Restore initial state and append the watermark - contents.append(IndirectPdfDict(stream='Q %s Do\n' % watermark_name)) - return page - -def watermark(input_fname, watermark_fname, output_fname=None): - outfn = output_fname or ('watermark.' + os.path.basename(input_fname)) - w = pagexobj(PdfReader(watermark_fname).pages[0]) - pages = PdfReader(input_fname).pages - PdfWriter().addpages([fixpage(x, w) for x in pages]).write(outfn) - return outfn - -def fix_pdf(fname, watermark_fname, indir, outdir): - from os import mkdir, path - if not path.exists(outdir): - mkdir(outdir) - watermark = pagexobj(PdfReader(watermark_fname).pages[0]) - trailer = PdfReader(path.join(indir, fname)) - for page in trailer.pages: - fixpage(page, watermark) - PdfWriter().write(path.join(outdir, fname), trailer) - return len(trailer.pages) - -def batch_watermark(pdfdir, watermark_fname, outputdir='tmp'): - import traceback - from glob import glob - from os import path - fnames=glob(pdfdir+"/*.pdf") - total_pages = 0 - good_files = 0 - - for fname in fnames: - fname = fname.replace(pdfdir+'/','') - try: - total_pages += fix_pdf(fname, watermark_fname, pdfdir, outputdir) - good_files += 1 - print "%s OK" %fname - except Exception: - print "%s Failed miserably" %fname - print traceback.format_exc()[:2000] - #raise - - print "success %.2f%% %s pages" %((float(good_files)/len(fnames))*100, total_pages) - -if __name__ == "__main__": - - from optparse import OptionParser - parser = OptionParser(description = __doc__) - parser.add_option('-i', dest='input_fname', help='file name to be watermarked (pdf)') - parser.add_option('-w', dest='watermark_fname', help='watermark file name (pdf)') - parser.add_option('-d', dest='pdfdir', help='watermark all pdf files in this directory') - parser.add_option('-o', dest='outdir', help='outputdir used with option -d', default='tmp') - options, args = parser.parse_args() - - if options.input_fname and options.watermark_fname: - watermark = pagexobj(PdfReader(options.watermark_fname).pages[0]) - outfn = 'watermark.' + os.path.basename(options.input_fname) - pages = PdfReader(options.input_fname).pages - - PdfWriter().addpages([fixpage(x, watermark) for x in pages]).write(outfn) - - elif options.pdfdir and options.watermark_fname: - batch_watermark(options.pdfdir, options.watermark_fname, options.outdir) - - else: - parser.print_help() - -
\ No newline at end of file +from pdfrw import PdfReader, PdfWriter, PageMerge + +argv = sys.argv[1:] +underneath = '-u' in argv +if underneath: + del argv[argv.index('-u')] +inpfn, wmarkfn = argv +outfn = 'watermark.' + os.path.basename(inpfn) +wmark = PageMerge().add(PdfReader(wmarkfn).pages[0])[0] +trailer = PdfReader(inpfn) +for page in trailer.pages: + PageMerge(page).add(wmark, prepend=underneath).render() +PdfWriter().write(outfn, trailer) |