summaryrefslogtreecommitdiff
path: root/examples/extract.py
blob: 3756b4f6b1377c4f01f5f7cd5a86ef7d1a92eddd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python

'''
usage:   extract.py <some.pdf>

Locates Form XObjects and Image XObjects within the PDF,
and creates a new PDF containing these -- one per page.

Resulting file will be named extract.<some.pdf>

'''

import sys
import os

from pdfrw import PdfReader, PdfWriter
from pdfrw.findobjs import page_per_xobj


inpfn, = sys.argv[1:]
outfn = 'extract.' + os.path.basename(inpfn)
pages = list(page_per_xobj(PdfReader(inpfn).pages, margin=0.5*72))
if not pages:
    raise IndexError("No XObjects found")
writer = PdfWriter()
writer.addpages(pages)
writer.write(outfn)