summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rwxr-xr-xexamples/4up.py66
-rwxr-xr-xexamples/alter.py1
-rwxr-xr-xexamples/booklet.py60
-rwxr-xr-xexamples/cat.py35
-rwxr-xr-xexamples/extract.py27
-rw-r--r--examples/find_pdfrw.py33
-rwxr-xr-xexamples/metadata.py39
-rwxr-xr-xexamples/poster.py70
-rwxr-xr-xexamples/print_two.py44
-rwxr-xr-xexamples/rl1/4up.py1
-rwxr-xr-xexamples/rl1/booklet.py3
-rw-r--r--examples/rl1/find_pdfrw.py33
-rwxr-xr-xexamples/rl1/platypus_pdf_template.py30
-rwxr-xr-xexamples/rl1/subset.py7
-rwxr-xr-xexamples/rl2/copy.py2
-rw-r--r--examples/rl2/decodegraphics.py114
-rw-r--r--examples/rl2/find_pdfrw.py33
-rwxr-xr-xexamples/rotate.py4
-rwxr-xr-xexamples/subset.py1
-rwxr-xr-xexamples/unspread.py32
-rwxr-xr-xexamples/watermark.py123
21 files changed, 306 insertions, 452 deletions
diff --git a/examples/4up.py b/examples/4up.py
index 491af14..ad2bd3b 100755
--- a/examples/4up.py
+++ b/examples/4up.py
@@ -1,51 +1,33 @@
#!/usr/bin/env python
'''
-usage: 4up.py my.pdf firstpage lastpage
-
-Creates 4up.my.pdf
+usage: 4up.py my.pdf
+Creates 4up.my.pdf with a single output page for every
+4 input pages.
'''
import sys
import os
-import find_pdfrw
-from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, PdfArray
-from pdfrw.buildxobj import pagexobj
-
-def get4(allpages):
- # Pull a maximum of 4 pages off the list
- pages = [pagexobj(x) for x in allpages[:4]]
- del allpages[:4]
-
- x_max = max(page.BBox[2] for page in pages)
- y_max = max(page.BBox[3] for page in pages)
-
- stream = []
- xobjdict = PdfDict()
- for index, page in enumerate(pages):
- x = x_max * (index & 1) / 2.0
- y = y_max * (index <= 1) / 2.0
- index = '/P%s' % index
- stream.append('q 0.5 0 0 0.5 %s %s cm %s Do Q\n' % (x, y, index))
- xobjdict[index] = page
-
- return PdfDict(
- Type = PdfName.Page,
- Contents = PdfDict(stream=''.join(stream)),
- MediaBox = PdfArray([0, 0, x_max, y_max]),
- Resources = PdfDict(XObject = xobjdict),
- )
-
-def go(inpfn, outfn):
- pages = PdfReader(inpfn).pages
- writer = PdfWriter()
- while pages:
- writer.addpage(get4(pages))
- writer.write(outfn)
-
-if __name__ == '__main__':
- inpfn, = sys.argv[1:]
- outfn = '4up.' + os.path.basename(inpfn)
- go(inpfn, outfn)
+from pdfrw import PdfReader, PdfWriter, PageMerge
+
+
+def get4(srcpages):
+ scale = 0.5
+ srcpages = PageMerge() + srcpages
+ x_increment, y_increment = (scale * i for i in srcpages.xobj_box[2:])
+ for i, page in enumerate(srcpages):
+ page.scale(scale)
+ page.x = x_increment if i & 1 else 0
+ page.y = 0 if i & 2 else y_increment
+ return srcpages.render()
+
+
+inpfn, = sys.argv[1:]
+outfn = '4up.' + os.path.basename(inpfn)
+pages = PdfReader(inpfn).pages
+writer = PdfWriter()
+for index in range(0, len(pages), 4):
+ writer.addpage(get4(pages[index:index + 4]))
+writer.write(outfn)
diff --git a/examples/alter.py b/examples/alter.py
index 1c6d4e1..45b9c76 100755
--- a/examples/alter.py
+++ b/examples/alter.py
@@ -12,7 +12,6 @@ Demonstrates making a slight alteration to a preexisting PDF file.
import sys
import os
-import find_pdfrw
from pdfrw import PdfReader, PdfWriter
inpfn, = sys.argv[1:]
diff --git a/examples/booklet.py b/examples/booklet.py
index 0b3be74..4758b08 100755
--- a/examples/booklet.py
+++ b/examples/booklet.py
@@ -5,61 +5,35 @@ usage: booklet.py my.pdf
Creates booklet.my.pdf
-Pages organized in a form suitable for booklet printing.
-
+Pages organized in a form suitable for booklet printing, e.g.
+to print 4 8.5x11 pages using a single 11x17 sheet (double-sided).
'''
import sys
import os
-import find_pdfrw
-from pdfrw import PdfReader, PdfWriter, PdfDict, PdfArray, PdfName, IndirectPdfDict
-from pdfrw.buildxobj import pagexobj
-
-def fixpage(*pages):
- pages = [pagexobj(x) for x in pages]
+from pdfrw import PdfReader, PdfWriter, PageMerge
- class PageStuff(tuple):
- pass
- x = y = 0
- for i, page in enumerate(pages):
- index = '/P%s' % i
- shift_right = x and '1 0 0 1 %s 0 cm ' % x or ''
- stuff = PageStuff((index, page))
- stuff.stream = 'q %s%s Do Q\n' % (shift_right, index)
- x += page.BBox[2]
- y = max(y, page.BBox[3])
- pages[i] = stuff
-
- # Multiple copies of first page used as a placeholder to
- # get blank page on back.
- for p1, p2 in zip(pages, pages[1:]):
- if p1[1] is p2[1]:
- pages.remove(p1)
+def fixpage(*pages):
+ result = PageMerge() + (x for x in pages if x is not None)
+ result[-1].x += result[0].w
+ return result.render()
- return IndirectPdfDict(
- Type = PdfName.Page,
- Contents = PdfDict(stream=''.join(page.stream for page in pages)),
- MediaBox = PdfArray([0, 0, x, y]),
- Resources = PdfDict(
- XObject = PdfDict(pages),
- ),
- )
inpfn, = sys.argv[1:]
outfn = 'booklet.' + os.path.basename(inpfn)
-pages = PdfReader(inpfn).pages
+ipages = PdfReader(inpfn).pages
-# Use page1 as a marker to print a blank at the end
-if len(pages) & 1:
- pages.append(pages[0])
+# Make sure we have an even number
+if len(ipages) & 1:
+ ipages.append(None)
-bigpages = []
-while len(pages) > 2:
- bigpages.append(fixpage(pages.pop(), pages.pop(0)))
- bigpages.append(fixpage(pages.pop(0), pages.pop()))
+opages = []
+while len(ipages) > 2:
+ opages.append(fixpage(ipages.pop(), ipages.pop(0)))
+ opages.append(fixpage(ipages.pop(0), ipages.pop()))
-bigpages += pages
+opages += ipages
-PdfWriter().addpages(bigpages).write(outfn)
+PdfWriter().addpages(opages).write(outfn)
diff --git a/examples/cat.py b/examples/cat.py
new file mode 100755
index 0000000..86cf643
--- /dev/null
+++ b/examples/cat.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+'''
+usage: cat.py <first.pdf> [<next.pdf> ...]
+
+Creates cat.<first.pdf>
+
+This file demonstrates two features:
+
+1) Concatenating multiple input PDFs.
+
+2) adding metadata to the PDF.
+
+'''
+
+import sys
+import os
+
+from pdfrw import PdfReader, PdfWriter, IndirectPdfDict
+
+inputs = sys.argv[1:]
+assert inputs
+outfn = 'cat.' + os.path.basename(inputs[0])
+
+writer = PdfWriter()
+for inpfn in inputs:
+ writer.addpages(PdfReader(inpfn).pages)
+
+writer.trailer.Info = IndirectPdfDict(
+ Title='your title goes here',
+ Author='your name goes here',
+ Subject='what is it all about?',
+ Creator='some script goes here',
+)
+writer.write(outfn)
diff --git a/examples/extract.py b/examples/extract.py
new file mode 100755
index 0000000..3756b4f
--- /dev/null
+++ b/examples/extract.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+'''
+usage: extract.py <some.pdf>
+
+Locates Form XObjects and Image XObjects within the PDF,
+and creates a new PDF containing these -- one per page.
+
+Resulting file will be named extract.<some.pdf>
+
+'''
+
+import sys
+import os
+
+from pdfrw import PdfReader, PdfWriter
+from pdfrw.findobjs import page_per_xobj
+
+
+inpfn, = sys.argv[1:]
+outfn = 'extract.' + os.path.basename(inpfn)
+pages = list(page_per_xobj(PdfReader(inpfn).pages, margin=0.5*72))
+if not pages:
+ raise IndexError("No XObjects found")
+writer = PdfWriter()
+writer.addpages(pages)
+writer.write(outfn)
diff --git a/examples/find_pdfrw.py b/examples/find_pdfrw.py
deleted file mode 100644
index f69d412..0000000
--- a/examples/find_pdfrw.py
+++ /dev/null
@@ -1,33 +0,0 @@
-'''
- find_xxx.py -- Find the place in the tree where xxx lives.
-
- Ways to use:
- 1) Make a copy, change 'xxx' in package to be your name; or
- 2) Under Linux, just ln -s to where this is in the right tree
-
- Created by Pat Maupin, who doesn't consider it big enough to be worth copyrighting
-'''
-
-import sys
-import os
-
-myname = __name__[5:] # remove 'find_'
-myname = os.path.join(myname, '__init__.py')
-
-def trypath(newpath):
- path = None
- while path != newpath:
- path = newpath
- if os.path.exists(os.path.join(path, myname)):
- return path
- newpath = os.path.dirname(path)
-
-root = trypath(__file__) or trypath(os.path.realpath(__file__))
-
-if root is None:
- print
- print 'Warning: %s: Could not find path to development package %s' % (__file__, myname)
- print ' The import will either fail or will use system-installed libraries'
- print
-elif root not in sys.path:
- sys.path.append(root)
diff --git a/examples/metadata.py b/examples/metadata.py
deleted file mode 100755
index 9c69108..0000000
--- a/examples/metadata.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env python
-
-'''
-usage: metadata.py <first.pdf> [<next.pdf> ...]
-
-Creates output.pdf
-
-This file demonstrates two features:
-
-1) Concatenating multiple input PDFs.
-
-2) adding metadata to the PDF.
-
-If you do not need to add metadata, look at subset.py, which
-has a simpler interface to PdfWriter.
-
-'''
-
-import sys
-import os
-
-import find_pdfrw
-from pdfrw import PdfReader, PdfWriter, IndirectPdfDict
-
-inputs = sys.argv[1:]
-assert inputs
-outfn = 'output.pdf'
-
-writer = PdfWriter()
-for inpfn in inputs:
- writer.addpages(PdfReader(inpfn.pages)
-
-writer.trailer.Info = IndirectPdfDict(
- Title = 'your title goes here',
- Author = 'your name goes here',
- Subject = 'what is it all about?',
- Creator = 'some script goes here',
-)
-writer.write(outfn)
diff --git a/examples/poster.py b/examples/poster.py
index ee58282..7f1c1c2 100755
--- a/examples/poster.py
+++ b/examples/poster.py
@@ -7,51 +7,37 @@ Shows how to change the size on a PDF.
Motivation:
-My daughter needed to create a 48" x 36" poster, but her Mac version of Powerpoint
-only wanted to output 8.5" x 11" for some reason.
+My daughter needed to create a 48" x 36" poster, but her Mac
+version of Powerpoint only wanted to output 8.5" x 11" for
+some reason.
+
+So she did an 8.5x11" output with 0.5" margin all around
+(actual size of useful area 7.5x10") and we scaled it
+up by 4.8.
+
+We also copy the Info dict to the new PDF.
'''
import sys
import os
-import find_pdfrw
-from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, PdfArray, IndirectPdfDict
-from pdfrw.buildxobj import pagexobj
-
-def adjust(page):
- page = pagexobj(page)
- assert page.BBox == [0, 0, 11 * 72, int(8.5 * 72)], page.BBox
- margin = 72 // 2
- old_x, old_y = page.BBox[2] - 2 * margin, page.BBox[3] - 2 * margin
-
- new_x, new_y = 48 * 72, 36 * 72
- ratio = 1.0 * new_x / old_x
- assert ratio == 1.0 * new_y / old_y
-
- index = '/BasePage'
- x = -margin * ratio
- y = -margin * ratio
- stream = 'q %0.2f 0 0 %0.2f %s %s cm %s Do Q\n' % (ratio, ratio, x, y, index)
- xobjdict = PdfDict()
- xobjdict[index] = page
-
- return PdfDict(
- Type = PdfName.Page,
- Contents = PdfDict(stream=stream),
- MediaBox = PdfArray([0, 0, new_x, new_y]),
- Resources = PdfDict(XObject = xobjdict),
- )
-
-def go(inpfn, outfn):
- reader = PdfReader(inpfn)
- page, = reader.pages
- writer = PdfWriter()
- writer.addpage(adjust(page))
- writer.trailer.Info = IndirectPdfDict(reader.Info)
- writer.write(outfn)
-
-if __name__ == '__main__':
- inpfn, = sys.argv[1:]
- outfn = 'poster.' + os.path.basename(inpfn)
- go(inpfn, outfn)
+from pdfrw import PdfReader, PdfWriter, PageMerge, IndirectPdfDict
+
+
+def adjust(page, margin=36, scale=4.8):
+ info = PageMerge().add(page)
+ x1, y1, x2, y2 = info.xobj_box
+ viewrect = (margin, margin, x2 - x1 - 2 * margin, y2 - y1 - 2 * margin)
+ page = PageMerge().add(page, viewrect=viewrect)
+ page[0].scale(scale)
+ return page.render()
+
+
+inpfn, = sys.argv[1:]
+outfn = 'poster.' + os.path.basename(inpfn)
+reader = PdfReader(inpfn)
+writer = PdfWriter()
+writer.addpage(adjust(reader.pages[0]))
+writer.trailer.Info = IndirectPdfDict(reader.Info or {})
+writer.write(outfn)
diff --git a/examples/print_two.py b/examples/print_two.py
index d168f68..c54eaee 100755
--- a/examples/print_two.py
+++ b/examples/print_two.py
@@ -7,52 +7,26 @@ Creates print_two.my.pdf
This is only useful when you can cut down sheets of paper to make two
small documents. Works for double-sided only right now.
-
'''
import sys
import os
-import find_pdfrw
-from pdfrw import PdfReader, PdfWriter, PdfArray, IndirectPdfDict
+from pdfrw import PdfReader, PdfWriter, PageMerge
+
def fixpage(page, count=[0]):
count[0] += 1
- evenpage = not (count[0] & 1)
-
- # For demo purposes, just go with the MediaBox and toast the others
- box = [float(x) for x in page.MediaBox]
- assert box[0] == box[1] == 0, "demo won't work on this PDF"
-
- for key, value in sorted(page.iteritems()):
- if 'box' in key.lower():
- del page[key]
-
- startsize = tuple(box[2:])
- finalsize = box[3], 2 * box[2]
- page.MediaBox = PdfArray((0, 0) + finalsize)
- page.Rotate = (int(page.Rotate or 0) + 90) % 360
-
- contents = page.Contents
- if contents is None:
- return page
- contents = isinstance(contents, dict) and [contents] or contents
-
- prefix = '0 1 -1 0 %s %s cm\n' % (finalsize[0], 0)
- if evenpage:
- prefix = '1 0 0 1 %s %s cm\n' % (0, finalsize[1]/2) + prefix
- first_prefix = 'q\n-1 0 0 -1 %s %s cm\n' % finalsize + prefix
- second_prefix = '\nQ\n' + prefix
- first_prefix = IndirectPdfDict(stream=first_prefix)
- second_prefix = IndirectPdfDict(stream=second_prefix)
- contents = PdfArray(([second_prefix] + contents) * 2)
- contents[0] = first_prefix
- page.Contents = contents
- return page
+ oddpage = (count[0] & 1)
+
+ result = PageMerge()
+ for rotation in (180 + 180 * oddpage, 180 * oddpage):
+ result.add(page, rotate=rotation)
+ result[1].x = result[0].w
+ return result.render()
inpfn, = sys.argv[1:]
outfn = 'print_two.' + os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
-
PdfWriter().addpages(fixpage(x) for x in pages).write(outfn)
diff --git a/examples/rl1/4up.py b/examples/rl1/4up.py
index 385103a..49f766e 100755
--- a/examples/rl1/4up.py
+++ b/examples/rl1/4up.py
@@ -15,7 +15,6 @@ import os
from reportlab.pdfgen.canvas import Canvas
-import find_pdfrw
from pdfrw import PdfReader
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
diff --git a/examples/rl1/booklet.py b/examples/rl1/booklet.py
index 23f8a72..e7764a0 100755
--- a/examples/rl1/booklet.py
+++ b/examples/rl1/booklet.py
@@ -15,7 +15,6 @@ import os
from reportlab.pdfgen.canvas import Canvas
-import find_pdfrw
from pdfrw import PdfReader
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
@@ -43,7 +42,7 @@ def make_pdf(outfn, xobjpairs):
x += xobj.BBox[2]
y = max(y, xobj.BBox[3])
- canvas.setPageSize((x,y))
+ canvas.setPageSize((x, y))
# Handle blank back page
if len(xobjlist) > 1 and xobjlist[0] == xobjlist[-1]:
diff --git a/examples/rl1/find_pdfrw.py b/examples/rl1/find_pdfrw.py
deleted file mode 100644
index f69d412..0000000
--- a/examples/rl1/find_pdfrw.py
+++ /dev/null
@@ -1,33 +0,0 @@
-'''
- find_xxx.py -- Find the place in the tree where xxx lives.
-
- Ways to use:
- 1) Make a copy, change 'xxx' in package to be your name; or
- 2) Under Linux, just ln -s to where this is in the right tree
-
- Created by Pat Maupin, who doesn't consider it big enough to be worth copyrighting
-'''
-
-import sys
-import os
-
-myname = __name__[5:] # remove 'find_'
-myname = os.path.join(myname, '__init__.py')
-
-def trypath(newpath):
- path = None
- while path != newpath:
- path = newpath
- if os.path.exists(os.path.join(path, myname)):
- return path
- newpath = os.path.dirname(path)
-
-root = trypath(__file__) or trypath(os.path.realpath(__file__))
-
-if root is None:
- print
- print 'Warning: %s: Could not find path to development package %s' % (__file__, myname)
- print ' The import will either fail or will use system-installed libraries'
- print
-elif root not in sys.path:
- sys.path.append(root)
diff --git a/examples/rl1/platypus_pdf_template.py b/examples/rl1/platypus_pdf_template.py
index 8c85794..7e4769a 100755
--- a/examples/rl1/platypus_pdf_template.py
+++ b/examples/rl1/platypus_pdf_template.py
@@ -1,17 +1,18 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
-usage: platypus_pdf_template.py output.pdf pdf_file_to_use_as_template.pdf
+usage: platypus_pdf_template.py source.pdf
-Example of using pdfrw to use a pdf (page one) as the background for all
-other pages together with platypus.
+Creates platypus.source.pdf
-There is a table of contents in this example for completeness sake.
+Example of using pdfrw to use page 1 of a source PDF as the background
+for other pages programmatically generated with Platypus.
Contributed by user asannes
"""
import sys
+import os
from reportlab.platypus import PageTemplate, BaseDocTemplate, Frame
from reportlab.platypus import NextPageTemplate, Paragraph, PageBreak
@@ -21,7 +22,6 @@ from reportlab.rl_config import defaultPageSize
from reportlab.lib.units import inch
from reportlab.graphics import renderPDF
-import find_pdfrw
from pdfrw import PdfReader
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
@@ -29,6 +29,7 @@ from pdfrw.toreportlab import makerl
PAGE_WIDTH = defaultPageSize[0]
PAGE_HEIGHT = defaultPageSize[1]
+
class MyTemplate(PageTemplate):
"""The kernel of this example, where we use pdfrw to fill in the
background of a page before writing to it. This could be used to fill
@@ -57,6 +58,7 @@ class MyTemplate(PageTemplate):
canvas.doForm(rl_obj)
canvas.restoreState()
+
class MyDocTemplate(BaseDocTemplate):
"""Used to apply heading to table of contents."""
@@ -70,20 +72,22 @@ class MyDocTemplate(BaseDocTemplate):
self.canv.bookmarkPage(key)
self.notify('TOCEntry', [1, text, self.page, key])
+
def create_toc():
"""Creates the table of contents"""
table_of_contents = TableOfContents()
table_of_contents.dotsMinLevel = 0
- header1 = ParagraphStyle(name = 'Heading1', fontSize = 16, leading = 16)
- header2 = ParagraphStyle(name = 'Heading2', fontSize = 14, leading = 14)
+ header1 = ParagraphStyle(name='Heading1', fontSize=16, leading=16)
+ header2 = ParagraphStyle(name='Heading2', fontSize=14, leading=14)
table_of_contents.levelStyles = [header1, header2]
return [table_of_contents, PageBreak()]
+
def create_pdf(filename, pdf_template_filename):
"""Create the pdf, with all the contents"""
- pdf_report = open(filename, "w")
+ pdf_report = open(filename, "wb")
document = MyDocTemplate(pdf_report)
- templates = [ MyTemplate(pdf_template_filename, name='background') ]
+ templates = [MyTemplate(pdf_template_filename, name='background')]
document.addPageTemplates(templates)
styles = getSampleStyleSheet()
@@ -99,8 +103,6 @@ def create_pdf(filename, pdf_template_filename):
if __name__ == '__main__':
- try:
- output, template = sys.argv[1:]
- create_pdf(output, template)
- except ValueError:
- print "Usage: %s <output> <template>" % (sys.argv[0])
+ template, = sys.argv[1:]
+ output = 'platypus_pdf_template.' + os.path.basename(template)
+ create_pdf(output, template)
diff --git a/examples/rl1/subset.py b/examples/rl1/subset.py
index 8b2bf08..c05056c 100755
--- a/examples/rl1/subset.py
+++ b/examples/rl1/subset.py
@@ -17,7 +17,6 @@ import os
from reportlab.pdfgen.canvas import Canvas
-import find_pdfrw
from pdfrw import PdfReader
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl
@@ -25,14 +24,14 @@ from pdfrw.toreportlab import makerl
def go(inpfn, firstpage, lastpage):
firstpage, lastpage = int(firstpage), int(lastpage)
- outfn = 'subset_%s_to_%s.%s' % (firstpage, lastpage, os.path.basename(inpfn))
+ outfn = 'subset.' + os.path.basename(inpfn)
pages = PdfReader(inpfn).pages
- pages = [pagexobj(x) for x in pages[firstpage-1:lastpage]]
+ pages = [pagexobj(x) for x in pages[firstpage - 1:lastpage]]
canvas = Canvas(outfn)
for page in pages:
- canvas.setPageSize(tuple(page.BBox[2:]))
+ canvas.setPageSize((page.BBox[2], page.BBox[3]))
canvas.doForm(makerl(canvas, page))
canvas.showPage()
diff --git a/examples/rl2/copy.py b/examples/rl2/copy.py
index 20dad71..66fe0c6 100755
--- a/examples/rl2/copy.py
+++ b/examples/rl2/copy.py
@@ -20,7 +20,7 @@ from pdfrw import PdfReader, PdfWriter, PdfArray
inpfn, = sys.argv[1:]
outfn = 'copy.' + os.path.basename(inpfn)
-pages = PdfReader(inpfn).pages
+pages = PdfReader(inpfn, decompress=True).pages
canvas = Canvas(outfn, pageCompression=0)
for page in pages:
diff --git a/examples/rl2/decodegraphics.py b/examples/rl2/decodegraphics.py
index 9fa5a05..e2f3a9f 100644
--- a/examples/rl2/decodegraphics.py
+++ b/examples/rl2/decodegraphics.py
@@ -1,4 +1,4 @@
-# A part of pdfrw (pdfrw.googlecode.com)
+# A part of pdfrw (https://github.com/pmaupin/pdfrw)
# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details
@@ -13,13 +13,13 @@ Better to use Form XObjects for most things (see the example in rl1).
'''
from inspect import getargspec
-import find_pdfrw
from pdfrw import PdfTokens
-from pdfrw.pdfobjects import PdfString
+from pdfrw.objects import PdfString
#############################################################################
# Graphics parsing
+
def parse_array(self, token='[', params=None):
mylist = []
for token in self.tokens:
@@ -28,105 +28,134 @@ def parse_array(self, token='[', params=None):
mylist.append(token)
self.params.append(mylist)
+
def parse_savestate(self, token='q', params=''):
self.canv.saveState()
+
def parse_restorestate(self, token='Q', params=''):
self.canv.restoreState()
+
def parse_transform(self, token='cm', params='ffffff'):
self.canv.transform(*params)
+
def parse_linewidth(self, token='w', params='f'):
self.canv.setLineWidth(*params)
+
def parse_linecap(self, token='J', params='i'):
self.canv.setLineCap(*params)
+
def parse_linejoin(self, token='j', params='i'):
self.canv.setLineJoin(*params)
+
def parse_miterlimit(self, token='M', params='f'):
self.canv.setMiterLimit(*params)
+
def parse_dash(self, token='d', params='as'): # Array, string
self.canv.setDash(*params)
+
def parse_intent(self, token='ri', params='n'):
# TODO: add logging
pass
+
def parse_flatness(self, token='i', params='i'):
# TODO: add logging
pass
+
def parse_gstate(self, token='gs', params='n'):
# TODO: add logging
# Could parse stuff we care about from here later
pass
+
def parse_move(self, token='m', params='ff'):
if self.gpath is None:
self.gpath = self.canv.beginPath()
self.gpath.moveTo(*params)
self.current_point = params
+
def parse_line(self, token='l', params='ff'):
self.gpath.lineTo(*params)
self.current_point = params
+
def parse_curve(self, token='c', params='ffffff'):
self.gpath.curveTo(*params)
self.current_point = params[-2:]
+
def parse_curve1(self, token='v', params='ffff'):
parse_curve(self, token, tuple(self.current_point) + tuple(params))
+
def parse_curve2(self, token='y', params='ffff'):
parse_curve(self, token, tuple(params) + tuple(params[-2:]))
+
def parse_close(self, token='h', params=''):
self.gpath.close()
+
def parse_rect(self, token='re', params='ffff'):
if self.gpath is None:
self.gpath = self.canv.beginPath()
self.gpath.rect(*params)
self.current_point = params[-2:]
+
def parse_stroke(self, token='S', params=''):
finish_path(self, 1, 0, 0)
+
def parse_close_stroke(self, token='s', params=''):
self.gpath.close()
finish_path(self, 1, 0, 0)
+
def parse_fill(self, token='f', params=''):
finish_path(self, 0, 1, 1)
+
def parse_fill_compat(self, token='F', params=''):
finish_path(self, 0, 1, 1)
+
def parse_fill_even_odd(self, token='f*', params=''):
finish_path(self, 0, 1, 0)
+
def parse_fill_stroke_even_odd(self, token='B*', params=''):
finish_path(self, 1, 1, 0)
+
def parse_fill_stroke(self, token='B', params=''):
finish_path(self, 1, 1, 1)
+
def parse_close_fill_stroke_even_odd(self, token='b*', params=''):
self.gpath.close()
finish_path(self, 1, 1, 0)
+
def parse_close_fill_stroke(self, token='b', params=''):
self.gpath.close()
finish_path(self, 1, 1, 1)
+
def parse_nop(self, token='n', params=''):
finish_path(self, 0, 0, 0)
+
def finish_path(self, stroke, fill, fillmode):
if self.gpath is not None:
canv = self.canv
@@ -135,39 +164,49 @@ def finish_path(self, stroke, fill, fillmode):
canv._fillMode = oldmode
self.gpath = None
+
def parse_clip_path(self, token='W', params=''):
# TODO: add logging
pass
+
def parse_clip_path_even_odd(self, token='W*', params=''):
# TODO: add logging
pass
+
def parse_stroke_gray(self, token='G', params='f'):
self.canv.setStrokeGray(*params)
+
def parse_fill_gray(self, token='g', params='f'):
self.canv.setFillGray(*params)
+
def parse_stroke_rgb(self, token='RG', params='fff'):
self.canv.setStrokeColorRGB(*params)
+
def parse_fill_rgb(self, token='rg', params='fff'):
self.canv.setFillColorRGB(*params)
+
def parse_stroke_cmyk(self, token='K', params='ffff'):
self.canv.setStrokeColorCMYK(*params)
+
def parse_fill_cmyk(self, token='k', params='ffff'):
self.canv.setFillColorCMYK(*params)
#############################################################################
# Text parsing
+
def parse_begin_text(self, token='BT', params=''):
assert self.tpath is None
self.tpath = self.canv.beginText()
+
def parse_text_transform(self, token='Tm', params='ffffff'):
path = self.tpath
@@ -177,20 +216,23 @@ def parse_text_transform(self, token='Tm', params='ffffff'):
except AttributeError:
pass
else:
- if code[-1] == '1 0 0 1 0 0 Tm':
+ if code[-1] == '1 0 0 1 0 0 Tm':
code.pop()
path.setTextTransform(*params)
+
def parse_setfont(self, token='Tf', params='nf'):
fontinfo = self.fontdict[params[0]]
self.tpath._setFont(fontinfo.name, params[1])
self.curfont = fontinfo
+
def parse_text_out(self, token='Tj', params='t'):
text = params[0].decode(self.curfont.remap, self.curfont.twobyte)
self.tpath.textOut(text)
+
def parse_TJ(self, token='TJ', params='a'):
remap = self.curfont.remap
twobyte = self.curfont.twobyte
@@ -204,41 +246,52 @@ def parse_TJ(self, token='TJ', params='a'):
text = ''.join(result)
self.tpath.textOut(text)
+
def parse_end_text(self, token='ET', params=''):
assert self.tpath is not None
self.canv.drawText(self.tpath)
- self.tpath=None
+ self.tpath = None
+
def parse_move_cursor(self, token='Td', params='ff'):
self.tpath.moveCursor(params[0], -params[1])
+
def parse_set_leading(self, token='TL', params='f'):
self.tpath.setLeading(*params)
+
def parse_text_line(self, token='T*', params=''):
self.tpath.textLine()
+
def parse_set_char_space(self, token='Tc', params='f'):
self.tpath.setCharSpace(*params)
+
def parse_set_word_space(self, token='Tw', params='f'):
self.tpath.setWordSpace(*params)
+
def parse_set_hscale(self, token='Tz', params='f'):
self.tpath.setHorizScale(params[0] - 100)
+
def parse_set_rise(self, token='Ts', params='f'):
self.tpath.setRise(*params)
+
def parse_xobject(self, token='Do', params='n'):
# TODO: Need to do this
pass
+
class FontInfo(object):
''' Pretty basic -- needs a lot of work to work right for all fonts
'''
lookup = {
- 'BitstreamVeraSans' : 'Helvetica', # WRONG -- have to learn about font stuff...
+ # WRONG -- have to learn about font stuff...
+ 'BitstreamVeraSans': 'Helvetica',
}
def __init__(self, source):
@@ -254,16 +307,19 @@ class FontInfo(object):
assert not len(info) & 1
info2 = []
for x in info:
- assert x[0] == '<' and x[-1] == '>' and len(x) in (4,6), x
+ assert x[0] == '<' and x[-1] == '>' and len(x) in (4, 6), x
i = int(x[1:-1], 16)
info2.append(i)
- self.remap = dict((x,chr(y)) for (x,y) in zip(info2[::2], info2[1::2])).get
+ self.remap = dict((x, chr(y)) for (x, y) in
+ zip(info2[::2], info2[1::2])).get
self.twobyte = len(info[0]) > 4
#############################################################################
# Control structures
+
def findparsefuncs():
+
def checkname(n):
assert n.startswith('/')
return n
@@ -276,8 +332,10 @@ def findparsefuncs():
assert isinstance(t, PdfString)
return t
- fixparam = dict(f=float, i=int, n=checkname, a=checkarray, s=str, t=checktext)
+ fixparam = dict(f=float, i=int, n=checkname, a=checkarray,
+ s=str, t=checktext)
fixcache = {}
+
def fixlist(params):
try:
result = fixcache[params]
@@ -288,12 +346,12 @@ def findparsefuncs():
dispatch = {}
expected_args = 'self token params'.split()
- for key, func in globals().iteritems():
+ for key, func in globals().items():
if key.startswith('parse_'):
args, varargs, keywords, defaults = getargspec(func)
- assert args == expected_args and varargs is None \
- and keywords is None and len(defaults) == 2, \
- (key, args, varargs, keywords, defaults)
+ assert (args == expected_args and varargs is None and
+ keywords is None and len(defaults) == 2), (
+ key, args, varargs, keywords, defaults)
token, params = defaults
if params is not None:
params = fixlist(params)
@@ -301,6 +359,7 @@ def findparsefuncs():
assert dispatch.setdefault(token, value) is value, repr(token)
return dispatch
+
class _ParseClass(object):
dispatch = findparsefuncs()
@@ -309,14 +368,16 @@ class _ParseClass(object):
self = cls()
contents = page.Contents
if contents.Filter is not None:
- raise SystemExit('Cannot parse graphics -- page encoded with %s' % contents.Filter)
+ raise SystemExit('Cannot parse graphics -- page encoded with %s'
+ % contents.Filter)
dispatch = cls.dispatch.get
self.tokens = tokens = iter(PdfTokens(contents.stream))
self.params = params = []
self.canv = canvas
self.gpath = None
self.tpath = None
- self.fontdict = dict((x,FontInfo(y)) for (x, y) in page.Resources.Font.iteritems())
+ self.fontdict = dict((x, FontInfo(y)) for
+ (x, y) in page.Resources.Font.iteritems())
for token in self.tokens:
info = dispatch(token)
@@ -330,35 +391,40 @@ class _ParseClass(object):
delta = len(params) - len(paraminfo)
if delta:
if delta < 0:
- print 'Operator %s expected %s parameters, got %s' % (token, len(paraminfo), params)
+ print ('Operator %s expected %s parameters, got %s' %
+ (token, len(paraminfo), params))
params[:] = []
continue
else:
- print "Unparsed parameters/commands:", params[:delta]
+ print ("Unparsed parameters/commands: %s" % params[:delta])
del params[:delta]
paraminfo = zip(paraminfo, params)
try:
- params[:] = [x(y) for (x,y) in paraminfo]
+ params[:] = [x(y) for (x, y) in paraminfo]
except:
- for i, (x,y) in enumerate(paraminfo):
+ for i, (x, y) in enumerate(paraminfo):
try:
x(y)
except:
- raise # For now
+ raise # For now
continue
func(self, token, params)
params[:] = []
-def debugparser(undisturbed = set('parse_array'.split())):
+
+def debugparser(undisturbed=set('parse_array'.split())):
def debugdispatch():
def getvalue(oldval):
name = oldval[0].__name__
+
def myfunc(self, token, params):
- print '%s called %s(%s)' % (token, name, ', '.join(str(x) for x in params))
+ print ('%s called %s(%s)' % (token, name,
+ ', '.join(str(x) for x in params)))
if name in undisturbed:
myfunc = oldval[0]
return myfunc, oldval[1]
- return dict((x, getvalue(y)) for (x,y) in _ParseClass.dispatch.iteritems())
+ return dict((x, getvalue(y))
+ for (x, y) in _ParseClass.dispatch.iteritems())
class _DebugParse(_ParseClass):
dispatch = debugdispatch()
@@ -374,5 +440,5 @@ if __name__ == '__main__':
fname, = sys.argv[1:]
pdf = PdfReader(fname)
for i, page in enumerate(pdf.pages):
- print '\nPage %s ------------------------------------' % i
+ print ('\nPage %s ------------------------------------' % i)
parse(page)
diff --git a/examples/rl2/find_pdfrw.py b/examples/rl2/find_pdfrw.py
deleted file mode 100644
index f69d412..0000000
--- a/examples/rl2/find_pdfrw.py
+++ /dev/null
@@ -1,33 +0,0 @@
-'''
- find_xxx.py -- Find the place in the tree where xxx lives.
-
- Ways to use:
- 1) Make a copy, change 'xxx' in package to be your name; or
- 2) Under Linux, just ln -s to where this is in the right tree
-
- Created by Pat Maupin, who doesn't consider it big enough to be worth copyrighting
-'''
-
-import sys
-import os
-
-myname = __name__[5:] # remove 'find_'
-myname = os.path.join(myname, '__init__.py')
-
-def trypath(newpath):
- path = None
- while path != newpath:
- path = newpath
- if os.path.exists(os.path.join(path, myname)):
- return path
- newpath = os.path.dirname(path)
-
-root = trypath(__file__) or trypath(os.path.realpath(__file__))
-
-if root is None:
- print
- print 'Warning: %s: Could not find path to development package %s' % (__file__, myname)
- print ' The import will either fail or will use system-installed libraries'
- print
-elif root not in sys.path:
- sys.path.append(root)
diff --git a/examples/rotate.py b/examples/rotate.py
index 4277940..8b10d05 100755
--- a/examples/rotate.py
+++ b/examples/rotate.py
@@ -13,7 +13,6 @@ Creates rotate.my.pdf with selected pages rotated. Rotates all by default.
import sys
import os
-import find_pdfrw
from pdfrw import PdfReader, PdfWriter
inpfn = sys.argv[1]
@@ -34,7 +33,8 @@ if not ranges:
for onerange in ranges:
onerange = (onerange + onerange[-1:])[:2]
for pagenum in range(onerange[0]-1, onerange[1]):
- pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or 0) + rotate) % 360
+ pages[pagenum].Rotate = (int(pages[pagenum].inheritable.Rotate or
+ 0) + rotate) % 360
outdata = PdfWriter()
outdata.trailer = trailer
diff --git a/examples/subset.py b/examples/subset.py
index e7f8ec6..30a577a 100755
--- a/examples/subset.py
+++ b/examples/subset.py
@@ -11,7 +11,6 @@ Creates subset.my.pdf
import sys
import os
-import find_pdfrw
from pdfrw import PdfReader, PdfWriter
inpfn = sys.argv[1]
diff --git a/examples/unspread.py b/examples/unspread.py
new file mode 100755
index 0000000..4b3bc5d
--- /dev/null
+++ b/examples/unspread.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+
+'''
+usage: unspread.py my.pdf
+
+Creates unspread.my.pdf
+
+Chops each page in half, e.g. if a source were
+created in booklet form, you could extract individual
+pages.
+'''
+
+import sys
+import os
+
+from pdfrw import PdfReader, PdfWriter, PageMerge
+
+
+def splitpage(src):
+ ''' Split a page into two (left and right)
+ '''
+ # Yield a result for each half of the page
+ for x_pos in (0, 0.5):
+ yield PageMerge().add(src, viewrect=(x_pos, 0, 0.5, 1)).render()
+
+
+inpfn, = sys.argv[1:]
+outfn = 'unspread.' + os.path.basename(inpfn)
+writer = PdfWriter()
+for page in PdfReader(inpfn).pages:
+ writer.addpages(splitpage(page))
+writer.write(outfn)
diff --git a/examples/watermark.py b/examples/watermark.py
index a4d933c..96b686b 100755
--- a/examples/watermark.py
+++ b/examples/watermark.py
@@ -3,112 +3,31 @@
'''
Simple example of watermarking using form xobjects (pdfrw).
-usage: watermark.py my.pdf single_page.pdf
+usage: watermark.py [-u] my.pdf single_page.pdf
Creates watermark.my.pdf, with every page overlaid with
-first page from single_page.pdf
+first page from single_page.pdf. If -u is selected, watermark
+will be placed underneath page (painted first).
+
+NB: At one point, this example was extremely complicated, with
+ multiple options. That only led to errors in implementation,
+ so it has been re-simplified in order to show basic principles
+ of the library operation and to match the other examples better.
'''
import sys
import os
-import find_pdfrw
-from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, IndirectPdfDict, PdfArray
-from pdfrw.buildxobj import pagexobj
-
-def fixpage(page, watermark):
-
- # Find the page's resource dictionary. Create if none
- resources = page.inheritable.Resources
- if resources is None:
- resources = page.Resources = PdfDict()
-
- # Find or create the parent's xobject dictionary
- xobjdict = resources.XObject
- if xobjdict is None:
- xobjdict = resources.XObject = PdfDict()
-
- # Allow for an infinite number of cascaded watermarks
- index = 0
- while 1:
- watermark_name = '/Watermark.%d' % index
- if watermark_name not in xobjdict:
- break
- index += 1
- xobjdict[watermark_name] = watermark
-
- # Turn the contents into an array if it is not already one
- contents = page.Contents
- if not isinstance(contents, PdfArray):
- contents = page.Contents = PdfArray([contents])
-
- # Save initial state before executing page
- contents.insert(0, IndirectPdfDict(stream='q\n'))
-
- # Restore initial state and append the watermark
- contents.append(IndirectPdfDict(stream='Q %s Do\n' % watermark_name))
- return page
-
-def watermark(input_fname, watermark_fname, output_fname=None):
- outfn = output_fname or ('watermark.' + os.path.basename(input_fname))
- w = pagexobj(PdfReader(watermark_fname).pages[0])
- pages = PdfReader(input_fname).pages
- PdfWriter().addpages([fixpage(x, w) for x in pages]).write(outfn)
- return outfn
-
-def fix_pdf(fname, watermark_fname, indir, outdir):
- from os import mkdir, path
- if not path.exists(outdir):
- mkdir(outdir)
- watermark = pagexobj(PdfReader(watermark_fname).pages[0])
- trailer = PdfReader(path.join(indir, fname))
- for page in trailer.pages:
- fixpage(page, watermark)
- PdfWriter().write(path.join(outdir, fname), trailer)
- return len(trailer.pages)
-
-def batch_watermark(pdfdir, watermark_fname, outputdir='tmp'):
- import traceback
- from glob import glob
- from os import path
- fnames=glob(pdfdir+"/*.pdf")
- total_pages = 0
- good_files = 0
-
- for fname in fnames:
- fname = fname.replace(pdfdir+'/','')
- try:
- total_pages += fix_pdf(fname, watermark_fname, pdfdir, outputdir)
- good_files += 1
- print "%s OK" %fname
- except Exception:
- print "%s Failed miserably" %fname
- print traceback.format_exc()[:2000]
- #raise
-
- print "success %.2f%% %s pages" %((float(good_files)/len(fnames))*100, total_pages)
-
-if __name__ == "__main__":
-
- from optparse import OptionParser
- parser = OptionParser(description = __doc__)
- parser.add_option('-i', dest='input_fname', help='file name to be watermarked (pdf)')
- parser.add_option('-w', dest='watermark_fname', help='watermark file name (pdf)')
- parser.add_option('-d', dest='pdfdir', help='watermark all pdf files in this directory')
- parser.add_option('-o', dest='outdir', help='outputdir used with option -d', default='tmp')
- options, args = parser.parse_args()
-
- if options.input_fname and options.watermark_fname:
- watermark = pagexobj(PdfReader(options.watermark_fname).pages[0])
- outfn = 'watermark.' + os.path.basename(options.input_fname)
- pages = PdfReader(options.input_fname).pages
-
- PdfWriter().addpages([fixpage(x, watermark) for x in pages]).write(outfn)
-
- elif options.pdfdir and options.watermark_fname:
- batch_watermark(options.pdfdir, options.watermark_fname, options.outdir)
-
- else:
- parser.print_help()
-
- \ No newline at end of file
+from pdfrw import PdfReader, PdfWriter, PageMerge
+
+argv = sys.argv[1:]
+underneath = '-u' in argv
+if underneath:
+ del argv[argv.index('-u')]
+inpfn, wmarkfn = argv
+outfn = 'watermark.' + os.path.basename(inpfn)
+wmark = PageMerge().add(PdfReader(wmarkfn).pages[0])[0]
+trailer = PdfReader(inpfn)
+for page in trailer.pages:
+ PageMerge(page).add(wmark, prepend=underneath).render()
+PdfWriter().write(outfn, trailer)