summaryrefslogtreecommitdiff
path: root/src/pikepdf/_methods.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/pikepdf/_methods.py')
-rw-r--r--src/pikepdf/_methods.py319
1 files changed, 270 insertions, 49 deletions
diff --git a/src/pikepdf/_methods.py b/src/pikepdf/_methods.py
index 6c2b90b..ac6134c 100644
--- a/src/pikepdf/_methods.py
+++ b/src/pikepdf/_methods.py
@@ -12,22 +12,21 @@ bindings after the fact.
We can also move the implementation to C++ if desired.
"""
-from tempfile import NamedTemporaryFile
-from subprocess import run, PIPE
-from io import BytesIO
-
-from collections.abc import KeysView
-
import inspect
+from collections import namedtuple
+from collections.abc import KeysView
+from io import BytesIO
+from subprocess import PIPE, run
+from tempfile import NamedTemporaryFile
-from . import Pdf, Dictionary, Array, Name, Stream, Object
+from . import Array, Dictionary, Name, Object, Pdf, Stream
from ._qpdf import _ObjectMapping
-from .models import PdfMetadata
-
+from .models import PdfMetadata, Permissions, EncryptionInfo
# pylint: disable=no-member,unsupported-membership-test,unsubscriptable-object
-def extends(cls_cpp):
+
+def augments(cls_cpp):
"""Attach methods of a Python support class to an existing class
This monkeypatches all methods defined in the support class onto an
@@ -35,37 +34,44 @@ def extends(cls_cpp):
.. code-block:: python
- @extends(ClassDefinedInCpp)
+ @augments(ClassDefinedInCpp)
class SupportClass:
def foo(self):
pass
- The method 'foo' will be monkeypatched on ClassDefinedInCpp. SupportClass
+ The Python method 'foo' will be monkeypatched on ClassDefinedInCpp. SupportClass
has no meaning on its own and should not be used, but gets returned from
this function so IDE code inspection doesn't get too confused.
We don't subclass because it's much more convenient to monkeypatch Python
methods onto the existing Python binding of the C++ class. For one thing,
this allows the implementation to be moved from Python to C++ or vice
- versa. It saves having to implement an intermediate subclass and then
- ensures that the superclass never 'leaks' to pikepdf users.
+ versa. It saves having to implement an intermediate Python subclass and then
+ ensures that the C++ superclass never 'leaks' to pikepdf users. Finally,
+ wrapper classes and subclasses can become problematic if the call stack
+ crosses the C++/Python boundary multiple times.
Any existing methods may be used, regardless of whether they defined
elsewhere in the support class or in the target class.
+
+ The target class does not have to be C++ or derived from pybind11.
"""
- def real_class_extend(cls, cls_cpp=cls_cpp):
+ def class_augment(cls, cls_cpp=cls_cpp):
for name, fn in inspect.getmembers(cls, inspect.isfunction):
- fn.__qualname__ = fn.__qualname__.replace(
- cls.__name__, cls_cpp.__name__)
+ fn.__qualname__ = fn.__qualname__.replace(cls.__name__, cls_cpp.__name__)
setattr(cls_cpp, name, fn)
for name, fn in inspect.getmembers(cls, inspect.isdatadescriptor):
setattr(cls_cpp, name, fn)
+
def block_init(self):
+ # Prevent initialization of the support class
raise NotImplementedError(self.__class__.__name__ + '.__init__')
+
cls.__init__ = block_init
return cls
- return real_class_extend
+
+ return class_augment
def _single_page_pdf(page):
@@ -86,17 +92,15 @@ def _mudraw(buffer, fmt):
tmp_in.flush()
proc = run(
- ['mudraw', '-F', fmt, '-o', '-', tmp_in.name],
- stdout=PIPE, stderr=PIPE
+ ['mudraw', '-F', fmt, '-o', '-', tmp_in.name], stdout=PIPE, stderr=PIPE
)
if proc.stderr:
raise RuntimeError(proc.stderr.decode())
return proc.stdout
-@extends(Object)
+@augments(Object)
class Extend_Object:
-
def _repr_mimebundle_(self, **kwargs):
"""Present options to IPython for rich display of this object
@@ -127,13 +131,116 @@ class Extend_Object:
pass
return data
+ def emplace(self, other):
+ """Copy all items from other without making a new object.
-@extends(Pdf)
-class Extend_Pdf:
+ Particularly when working with pages, it may be desirable to remove all
+ of the existing page's contents and emplace (insert) a new page on top
+ of it, in a way that preserves all links and references to the original
+ page. (Or similarly, for other Dictionary objects in a PDF.)
- def _repr_mimebundle_(self, **kwargs):
+ When a page is assigned (``pdf.pages[0] = new_page``), only the
+ application knows if references to the original the original page are
+ still valid. For example, a PDF optimizer might restructure a page
+ object into another visually similar one, and references would be valid;
+ but for a program that reorganizes page contents such as a N-up
+ compositor, references may not be valid anymore.
+
+ This method takes precautions to ensure that child objects in common
+ with ``self`` and ``other`` are not inadvertently deleted.
+
+ Example:
+ >>> pdf.pages[0].objgen
+ (16, 0)
+ >>> pdf.pages[0].emplace(pdf.pages[1])
+ >>> pdf.pages[0].objgen
+ (16, 0) # Same object
+ """
+ del_keys = set(self.keys()) - set(other.keys())
+ for k in other.keys():
+ self[k] = other[k] # pylint: disable=unsupported-assignment-operation
+ for k in del_keys:
+ del self[k] # pylint: disable=unsupported-delete-operation
+
+ def write(self, data, *, filter=None, decode_parms=None, type_check=True):
+ """
+ Replace stream object's data with new (possibly compressed) `data`.
+
+ `filter` and `decode_parms` specify that compression that is present on
+ the input `data`.
+
+ When writing the PDF in :meth:`pikepdf.Pdf.save`,
+ pikepdf may change the compression or apply compression to data that was
+ not compressed, depending on the parameters given to that function. It
+ will never change lossless to lossy encoding.
+
+ PNG and TIFF images, even if compressed, cannot be directly inserted
+ into a PDF and displayed as images.
+
+ Args:
+ data (bytes): the new data to use for replacement
+ filter (pikepdf.Name or pikepdf.Array): The filter(s) with which the
+ data is (already) encoded
+ decode_parms (pikepdf.Dictionary or pikepdf.Array): Parameters for the
+ filters with which the object is encode
+ type_check (bool): Check arguments; use False only if you want to
+ intentionally create malformed PDFs.
+
+ If only one `filter` is specified, it may be a name such as
+ `Name('/FlateDecode')`. If there are multiple filters, then array
+ of names should be given.
+
+ If there is only one filter, `decode_parms` is a Dictionary of
+ parameters for that filter. If there are multiple filters, then
+ `decode_parms` is an Array of Dictionary, where each array index
+ is corresponds to the filter.
"""
- Present options to IPython for rich display of this object
+
+ if type_check and filter is not None:
+ if isinstance(filter, list):
+ filter = Array(filter)
+ filter = filter.wrap_in_array()
+
+ if isinstance(decode_parms, list):
+ decode_parms = Array(decode_parms)
+ elif decode_parms is None:
+ decode_parms = Array([])
+ else:
+ decode_parms = decode_parms.wrap_in_array()
+
+ if not all(isinstance(item, Name) for item in filter):
+ raise TypeError(
+ "filter must be: pikepdf.Name or pikepdf.Array([pikepdf.Name])"
+ )
+ if not all(
+ (isinstance(item, Dictionary) or item is None) for item in decode_parms
+ ):
+ raise TypeError(
+ "decode_parms must be: pikepdf.Dictionary or "
+ "pikepdf.Array([pikepdf.Dictionary])"
+ )
+ if len(decode_parms) != 0:
+ if len(filter) != len(decode_parms):
+ raise ValueError(
+ (
+ "filter ({}) and decode_parms ({}) must be arrays of "
+ " same length"
+ ).format(repr(filter), repr(decode_parms))
+ )
+ if len(filter) == 1:
+ filter = filter[0]
+ if len(decode_parms) == 0:
+ decode_parms = None
+ elif len(decode_parms) == 1:
+ decode_parms = decode_parms[0]
+ self._write(data, filter=filter, decode_parms=decode_parms)
+
+
+@augments(Pdf)
+class Extend_Pdf:
+ def _repr_mimebundle_(self, **_kwargs):
+ """
+ Present options to IPython or Jupyter for rich display of this object
See https://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display
"""
@@ -145,16 +252,12 @@ class Extend_Pdf:
data = {'application/pdf': bio.read()}
return data
- def open_metadata(
- self,
- set_pikepdf_as_editor=True,
- update_docinfo=True
- ):
+ def open_metadata(self, set_pikepdf_as_editor=True, update_docinfo=True):
"""
Open the PDF's XMP metadata for editing
Recommend for use in a ``with`` block. Changes are committed to the
- PDF when the block exits.
+ PDF when the block exits. (The ``Pdf`` must still be opened.)
Example:
>>> with pdf.open_metadata() as meta:
@@ -173,11 +276,128 @@ class Extend_Pdf:
pikepdf.models.PdfMetadata
"""
return PdfMetadata(
- self,
- pikepdf_mark=set_pikepdf_as_editor,
- sync_docinfo=update_docinfo
+ self, pikepdf_mark=set_pikepdf_as_editor, sync_docinfo=update_docinfo
+ )
+
+ def make_stream(self, data):
+ """
+ Create a new pikepdf.Stream object that is attached to this PDF.
+
+ Args:
+ data (bytes): Binary data for the stream object
+ """
+ return Stream(self, data)
+
+ def add_blank_page(self, *, page_size=(612, 792)):
+ """
+ Add a blank page to this PD. If pages already exist, the page will be added to
+ the end. Pages may be reordered using ``Pdf.pages``.
+
+ The caller may add content to the page by modifying its objects after creating
+ it.
+
+ Args:
+ page_size (tuple): The size of the page in PDF units (1/72 inch or 0.35mm).
+ Default size is set to a US Letter 8.5" x 11" page.
+ """
+ for dim in page_size:
+ if not (3 <= dim <= 14400):
+ raise ValueError('Page size must be between 3 and 14400 PDF units')
+
+ page_dict = Dictionary(
+ Type=Name.Page,
+ MediaBox=Array([0, 0, page_size[0], page_size[1]]),
+ Contents=self.make_stream(b''),
+ Resources=Dictionary(),
+ )
+ page = self.make_indirect(page_dict)
+ self._add_page(page, first=False)
+ return page
+
+ def close(self):
+ """
+ Close a Pdf object and release resources acquired by pikepdf
+
+ If pikepdf opened the file handle it will close it (e.g. when opened with a file
+ path). If the caller opened the file for pikepdf, the caller close the file.
+
+ pikepdf lazily loads data from PDFs, so some :class:`pikepdf.Object` may
+ implicitly depend on the :class:`pikepdf.Pdf` being open. This is always the
+ case for :class:`pikepdf.Stream` but can be true for any object. Do not close
+ the `Pdf` object if you might still be accessing content from it.
+
+ When an ``Object`` is copied from one ``Pdf`` to another, the ``Object`` is copied into
+ the destination ``Pdf`` immediately, so after accessing all desired information
+ from the source ``Pdf`` it may be closed.
+
+ Caution:
+ Closing the ``Pdf`` is currently implemented by resetting it to an empty
+ sentinel. It is currently possible to edit the sentinel as if it were a live
+ object. This behavior should not be relied on and is subject to change.
+
+ """
+
+ EMPTY_PDF = (
+ b"%PDF-1.3\n"
+ b"1 0 obj\n"
+ b"<< /Type /Catalog /Pages 2 0 R >>\n"
+ b"endobj\n"
+ b"2 0 obj\n"
+ b"<< /Type /Pages /Kids [] /Count 0 >>\n"
+ b"endobj\n"
+ b"xref\n"
+ b"0 3\n"
+ b"0000000000 65535 f \n"
+ b"0000000009 00000 n \n"
+ b"0000000058 00000 n \n"
+ b"trailer << /Size 3 /Root 1 0 R >>\n"
+ b"startxref\n"
+ b"110\n"
+ b"%%EOF\n"
)
+ if self.filename:
+ description = "closed file: " + self.filename
+ else:
+ description = "closed object"
+ self._process(description, EMPTY_PDF)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+ @property
+ def allow(self):
+ """
+ Report permissions associated with this PDF.
+
+ By default these permissions will be replicated when the PDF is
+ saved. Permissions may also only be changed when a PDF is being saved,
+ and are only available for encrypted PDFs. If a PDF is not encrypted,
+ all operations are reported as allowed.
+
+ pikepdf has no way of enforcing permissions.
+
+ Returns: pikepdf.models.Permissions
+ """
+ results = {}
+ for field in Permissions.fields():
+ results[field] = getattr(self, '_allow_' + field)
+ return Permissions(**results)
+
+ @property
+ def encryption(self):
+ """
+ Report encryption information for this PDF.
+
+ Encryption settings may only be changed when a PDF is saved.
+
+ Returns: pikepdf.models.EncryptionInfo
+ """
+ return EncryptionInfo(self._encryption_data)
+
def _attach(self, *, basename, filebytes, mime=None, desc=''):
"""
Attach a file to this PDF
@@ -219,6 +439,7 @@ class Extend_Pdf:
if not mime:
from mimetypes import guess_type
+
mime, _encoding = guess_type(basename)
if not mime:
mime = 'application/octet-stream'
@@ -226,28 +447,28 @@ class Extend_Pdf:
filestream = Stream(self, filebytes)
filestream.Subtype = Name('/' + mime)
- filespec = Dictionary({
- '/Type': Name.Filespec,
- '/F': basename,
- '/UF': basename,
- '/Desc': desc,
- '/EF': Dictionary({
- '/F': filestream
- })
- })
+ filespec = Dictionary(
+ {
+ '/Type': Name.Filespec,
+ '/F': basename,
+ '/UF': basename,
+ '/Desc': desc,
+ '/EF': Dictionary({'/F': filestream}),
+ }
+ )
# names = self.Root.Names.EmbeddedFiles.Names.as_list()
# names.append(filename) # Key
# names.append(self.make_indirect(filespec))
- self.Root.Names.EmbeddedFiles.Names = Array([
- basename, # key
- self.make_indirect(filespec)
- ])
+ self.Root.Names.EmbeddedFiles.Names = Array(
+ [basename, self.make_indirect(filespec)] # key
+ )
if '/PageMode' not in self.Root:
self.Root.PageMode = Name.UseAttachments
-@extends(_ObjectMapping)
+
+@augments(_ObjectMapping)
class Extend_ObjectMapping:
def __contains__(self, key):
try: