summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/objects.rst30
-rw-r--r--src/pikepdf/_methods.py23
-rw-r--r--src/qpdf/qpdf.cpp4
-rw-r--r--tests/test_pdf.py7
4 files changed, 53 insertions, 11 deletions
diff --git a/docs/objects.rst b/docs/objects.rst
index c08f460..c20fcc4 100644
--- a/docs/objects.rst
+++ b/docs/objects.rst
@@ -18,9 +18,9 @@ value in a PDF is assigned to a Python ``float``, pikepdf will convert it to
``Decimal``.
Types that are not directly convertible to Python are represented as
-:class:`pikepdf.Object`, a compound object that offers a superset of methods,
-some work only if the underlying type is suitable. You can use the EAFP
-idiom or ``isinstance`` to determine the type more precisely. This partly
+:class:`pikepdf.Object`, a compound object that offers a superset of possible
+methods, some of which only if the underlying type is suitable. Use the EAFP
+idiom, or ``isinstance`` to determine the type more precisely. This partly
reflects the fact that the PDF specification allows many data fields to be
one of several types.
@@ -67,3 +67,27 @@ the appropriate pikepdf object when passed to pikepdf APIs – when possible.
However, pikepdf sends ``pikepdf.Object`` types back to Python on return calls,
in most cases, because pikepdf needs to keep track of objects that came from
PDFs originally.
+
+
+Object lifecycle and memory management
+======================================
+
+As mentioned above, a :class:`pikepdf.Object` may reference data that is lazily
+loaded from its source :class:`pikepdf.Pdf`. Closing the `Pdf` with
+:meth:`pikepdf.Pdf.close` will invalidate some objects, depending on whether
+or not the data was loaded, and other implementation details that may change.
+Generally speaking, a :class:`pikepdf.Pdf` should be held open until it is no
+longer needed, and objects that were derived from it may or may not be usable
+after it is closed.
+
+Simple objects (booleans, integers, decimals, ``None``) are copied directly
+to Python as pure Python objects.
+
+For PDF stream objects, use :meth:`pikepdf.Object.read_bytes()` to obtain a
+copy of the object as pure bytes data, if this information is required after
+closing a PDF.
+
+When objects are copied from one :class:`pikepdf.Pdf` to another, the
+underlying data is copied immediately into the target. As such it is possible
+to merge hundreds of `Pdf` into one, keeping only a single source and the
+target file open at a time.
diff --git a/src/pikepdf/_methods.py b/src/pikepdf/_methods.py
index 55e9526..8d897c0 100644
--- a/src/pikepdf/_methods.py
+++ b/src/pikepdf/_methods.py
@@ -208,14 +208,25 @@ class Extend_Pdf:
def close(self):
"""
- Close a PDF object (probably)
+ Close a Pdf object and release resources acquired by pikepdf
- Strictly speaking this decrements the reference count of the open
- file object associated with this PDF. It is possible that other
- pikepdf objects also reference the same file object; if these exist,
- the file object **will remain open** until they are released.
+ If pikepdf opened the file handle it will close it (e.g. when opened with a file
+ path). If the caller opened the file for pikepdf, the caller close the file.
+
+ pikepdf lazily loads data from PDFs, so some :class:`pikepdf.Object` may
+ implicitly depend on the :class:`pikepdf.Pdf` being open. This is always the
+ case for :class:`pikepdf.Stream` but can be true for any object. Do not close
+ the `Pdf` object if you might still be accessing content from it.
+
+ When an `Object` is copied from one `Pdf` to another, the Object is copied into
+ the destination `Pdf` immediately, so after accessing all desired information
+ from the source Pdf it may be closed.
+
+ Caution:
+ Closing the `Pdf` is currently implemented by resetting it to an empty
+ sentinel. It is currently possible to edit the sentinel as if it were a live
+ object. This behavior should not be relied on and is subject to change.
- The PDF is closed by resetting to an empty in-memory PDF.
"""
EMPTY_PDF = (
diff --git a/src/qpdf/qpdf.cpp b/src/qpdf/qpdf.cpp
index 78d7570..26d7396 100644
--- a/src/qpdf/qpdf.cpp
+++ b/src/qpdf/qpdf.cpp
@@ -259,8 +259,8 @@ void init_qpdf(py::module &m)
will be accessed as a readable binary stream. pikepdf will read the
entire stream into a private buffer.
- If `.open()` is used in a `with` block, pikepdf will attempt to close
- the underlying file object.
+ `.open()` may be used in a `with`-block, `.close()` will be called when
+ the block exists.
Args:
filename_or_stream (os.PathLike): Filename of PDF to open
diff --git a/tests/test_pdf.py b/tests/test_pdf.py
index 9c4ec60..d9d7422 100644
--- a/tests/test_pdf.py
+++ b/tests/test_pdf.py
@@ -224,3 +224,10 @@ def test_with_block(resources):
with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
desc = pdf.filename
assert pdf.filename != desc
+
+
+def test_with_block_abuse(resources):
+ with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
+ im0 = pdf.pages[0].Resources.XObject['/Im0']
+ with pytest.raises(PdfError):
+ im0.read_bytes()