summaryrefslogtreecommitdiff
path: root/src/qpdf
diff options
context:
space:
mode:
Diffstat (limited to 'src/qpdf')
-rw-r--r--src/qpdf/annotation.cpp52
-rw-r--r--src/qpdf/object.cpp102
-rw-r--r--src/qpdf/pikepdf.cpp98
-rw-r--r--src/qpdf/pikepdf.h30
-rw-r--r--src/qpdf/qpdf.cpp585
-rw-r--r--src/qpdf/qpdf_inputsource.h2
-rw-r--r--src/qpdf/qpdf_pagelist.cpp87
-rw-r--r--src/qpdf/qpdf_pipeline.h77
8 files changed, 814 insertions, 219 deletions
diff --git a/src/qpdf/annotation.cpp b/src/qpdf/annotation.cpp
new file mode 100644
index 0000000..f82ebdf
--- /dev/null
+++ b/src/qpdf/annotation.cpp
@@ -0,0 +1,52 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright (C) 2019, James R. Barlow (https://github.com/jbarlow83/)
+ */
+
+
+
+#include <qpdf/Constants.h>
+#include <qpdf/Types.h>
+#include <qpdf/DLL.h>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/QPDFAnnotationObjectHelper.hh>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "pikepdf.h"
+
+
+void init_annotation(py::module &m)
+{
+ py::class_<QPDFAnnotationObjectHelper>(m, "Annotation")
+ .def(py::init<QPDFObjectHandle &>(), py::keep_alive<0, 1>())
+ .def_property_readonly("subtype", &QPDFAnnotationObjectHelper::getSubtype)
+ .def_property_readonly("flags", &QPDFAnnotationObjectHelper::getFlags)
+ .def_property_readonly("appearance_state", &QPDFAnnotationObjectHelper::getAppearanceState)
+ .def_property_readonly("appearance_dict", &QPDFAnnotationObjectHelper::getAppearanceDictionary)
+ .def("get_appearance_stream",
+ [](QPDFAnnotationObjectHelper& anno, QPDFObjectHandle& which, std::string const& state = "") {
+ // if (!which.isName())
+ // throw py::type_error("which must be pikepdf.Name");
+ return anno.getAppearanceStream(which.getName(), state);
+ },
+ py::arg("which"),
+ py::arg("state") = ""
+ )
+ .def("get_page_content_for_appearance",
+ [](QPDFAnnotationObjectHelper& anno, QPDFObjectHandle& name, int rotate, int required_flags, int forbidden_flags) {
+ //auto name = name_.getName();
+ return anno.getPageContentForAppearance(name.getName(), rotate, required_flags, forbidden_flags);
+ },
+ py::arg("name"),
+ py::arg("rotate"),
+ py::arg("required_flags") = 0,
+ py::arg("forbidden_flags") = an_invisible | an_hidden
+ )
+ ;
+}
diff --git a/src/qpdf/object.cpp b/src/qpdf/object.cpp
index 392d9ff..1270961 100644
--- a/src/qpdf/object.cpp
+++ b/src/qpdf/object.cpp
@@ -363,7 +363,8 @@ void init_object(py::module& m)
[](QPDFObjectHandle &h, std::shared_ptr<QPDF> possible_owner) {
return (h.getOwningQPDF() == possible_owner.get());
},
- "Test if this object is owned by the indicated *possible_owner*."
+ "Test if this object is owned by the indicated *possible_owner*.",
+ py::arg("possible_owner")
)
.def_property_readonly("is_indirect", &QPDFObjectHandle::isIndirect)
.def("__repr__", &objecthandle_repr)
@@ -555,9 +556,9 @@ void init_object(py::module& m)
}
return py::cast(value);
},
- "for dictionary objects, behave as dict.get(key, default=None)",
+ "For ``pikepdf.Dictionary`` objects, behave as ``dict.get(key, default=None)``",
py::arg("key"),
- py::arg("default_") = py::none(),
+ py::arg("default") = py::none(),
py::return_value_policy::reference_internal
)
.def("get",
@@ -570,9 +571,9 @@ void init_object(py::module& m)
}
return py::cast(value);
},
- "for dictionary objects, behave as dict.get(key, default=None)",
+ "For ``pikepdf.Dictionary`` objects, behave as ``dict.get(key, default=None)``",
py::arg("key"),
- py::arg("default_") = py::none(),
+ py::arg("default") = py::none(),
py::return_value_policy::reference_internal
)
.def("keys", &QPDFObjectHandle::getKeys)
@@ -664,6 +665,12 @@ void init_object(py::module& m)
h.eraseItem(u_index);
}
)
+ .def("wrap_in_array",
+ [](QPDFObjectHandle &h) {
+ return h.wrapInArray();
+ },
+ "Return the object wrapped in an array if not already an array."
+ )
.def("get_stream_buffer",
[](QPDFObjectHandle &h) {
PointerHolder<Buffer> phbuf = h.getStreamData();
@@ -694,37 +701,17 @@ void init_object(py::module& m)
},
"Read the content stream associated with this object without decoding"
)
- .def("write",
- [](QPDFObjectHandle &h, py::bytes data, py::args args, py::kwargs kwargs) {
+ .def("_write",
+ [](QPDFObjectHandle &h, py::bytes data, py::object filter, py::object decode_parms) {
std::string sdata = data;
- QPDFObjectHandle filter = QPDFObjectHandle::newNull();
- QPDFObjectHandle decode_parms = QPDFObjectHandle::newNull();
- if (args.size() != 0)
- throw py::value_error("Too many positional arguments");
- if (kwargs.contains("filter"))
- filter = objecthandle_encode(kwargs["filter"]);
- if (kwargs.contains("decode_parms"))
- decode_parms = objecthandle_encode(kwargs["decode_parms"]);
- h.replaceStreamData(sdata, filter, decode_parms);
+ QPDFObjectHandle h_filter = objecthandle_encode(filter);
+ QPDFObjectHandle h_decode_parms = objecthandle_encode(decode_parms);
+ h.replaceStreamData(sdata, h_filter, h_decode_parms);
},
R"~~~(
- Replace the content stream with `data`, compressed according to `filter` and `decode_parms`
-
- :param data: the new data to use for replacement
- :type data: bytes
- :param filter: The filter(s) with which the data is (already) encoded
- :param decode_parms: Parameters for the filters with which the object is encode
-
- If only one `filter` is specified, it may be a name such as
- `Name('/FlateDecode')`. If there are multiple filters, then array
- of names should be given.
-
- If there is only one filter, `decode_parms` is a Dictionary of
- parameters for that filter. If there are multiple filters, then
- `decode_parms` is an Array of Dictionary, where each array index
- is corresponds to the filter.
-
- )~~~"
+ Low level write/replace stream data without argument checking. Use .write().
+ )~~~",
+ py::arg("data"), py::arg("filter"), py::arg("decode_parms")
)
.def_property_readonly("images",
[](QPDFObjectHandle &h) {
@@ -749,7 +736,16 @@ void init_object(py::module& m)
py::arg("prepend") = false,
py::keep_alive<1, 2>()
)
- .def("page_contents_coalesce", &QPDFObjectHandle::coalesceContentStreams)
+ .def("page_contents_coalesce", &QPDFObjectHandle::coalesceContentStreams,
+ R"~~~(
+ Coalesce an array of page content streams into a single content stream.
+
+ The PDF specification allows the ``/Contents`` object to contain either
+ an array of content streams or a single content stream. However, it
+ simplifies parsing and editing if there is only a single content stream.
+ This function merges all content streams.
+ )~~~"
+ )
.def_property_readonly("_objgen",
&object_get_objgen
)
@@ -811,6 +807,41 @@ void init_object(py::module& m)
py::arg("resolved") = false,
"Convert PDF objects into their binary representation, optionally resolving indirect objects."
)
+ .def("to_json",
+ [](QPDFObjectHandle &h, bool dereference = false) -> py::bytes {
+ return h.getJSON(dereference).unparse();
+ },
+ py::arg("dereference") = false,
+ R"~~~(
+ Convert to a QPDF JSON representation of the object.
+
+ See the QPDF manual for a description of its JSON representation.
+ http://qpdf.sourceforge.net/files/qpdf-manual.html#ref.json
+
+ Not necessarily compatible with other PDF-JSON representations that
+ exist in the wild.
+
+ * Names are encoded as UTF-8 strings
+ * Indirect references are encoded as strings containing ``obj gen R``
+ * Strings are encoded as UTF-8 strings with unrepresentable binary
+ characters encoded as ``\uHHHH``
+ * Encoding streams just encodes the stream's dictionary; the stream
+ data is not represented
+ * Object types that are only valid in content streams (inline
+ image, operator) as well as "reserved" objects are not
+ representable and will be serialized as ``null``.
+
+ Args:
+ dereference (bool): If True, deference the object is this is an
+ indirect object.
+
+ Returns:
+ bytes: JSON bytestring of object. The object is UTF-8 encoded
+ and may be decoded to a Python str that represents the binary
+ values ``\x00-\xFF`` as ``U+0000`` to ``U+00FF``; that is,
+ it may contain mojibake.
+ )~~~"
+ )
; // end of QPDFObjectHandle bindings
m.def("_new_boolean", &QPDFObjectHandle::newBool, "Construct a PDF Boolean object");
@@ -900,7 +931,8 @@ void init_object(py::module& m)
[](const std::string& op) {
return QPDFObjectHandle::newOperator(op);
},
- "Construct a PDF Operator object for use in content streams"
+ "Construct a PDF Operator object for use in content streams.",
+ py::arg("op")
);
m.def("_Null", &QPDFObjectHandle::newNull,
"Construct a PDF Null object"
diff --git a/src/qpdf/pikepdf.cpp b/src/qpdf/pikepdf.cpp
new file mode 100644
index 0000000..2daa69a
--- /dev/null
+++ b/src/qpdf/pikepdf.cpp
@@ -0,0 +1,98 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright (C) 2019, James R. Barlow (https://github.com/jbarlow83/)
+ */
+
+#include <sstream>
+#include <type_traits>
+#include <cerrno>
+#include <cstring>
+
+#include "pikepdf.h"
+
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/QPDFSystemError.hh>
+#include <qpdf/QUtil.hh>
+
+#include <pybind11/stl.h>
+#include <pybind11/iostream.h>
+#include <pybind11/buffer_info.h>
+
+#include "qpdf_pagelist.h"
+#include "utils.h"
+
+
+extern "C" const char* qpdf_get_qpdf_version();
+
+
+class TemporaryErrnoChange {
+public:
+ TemporaryErrnoChange(int val) {
+ stored = errno;
+ errno = val;
+ }
+ ~TemporaryErrnoChange() {
+ errno = stored;
+ }
+private:
+ int stored;
+};
+
+
+PYBIND11_MODULE(_qpdf, m) {
+ //py::options options;
+ //options.disable_function_signatures();
+
+ m.doc() = "pikepdf provides a Pythonic interface for QPDF";
+
+ m.def("qpdf_version", &qpdf_get_qpdf_version, "Get libqpdf version");
+
+ init_qpdf(m);
+ init_pagelist(m);
+ init_object(m);
+ init_annotation(m);
+
+ m.def("utf8_to_pdf_doc",
+ [](py::str utf8, char unknown) {
+ std::string pdfdoc;
+ bool success = QUtil::utf8_to_pdf_doc(std::string(utf8), pdfdoc, unknown);
+ return py::make_tuple(success, py::bytes(pdfdoc));
+ }
+ );
+ m.def("pdf_doc_to_utf8",
+ [](py::bytes pdfdoc) -> py::str {
+ return py::str(QUtil::pdf_doc_to_utf8(pdfdoc));
+ }
+ );
+
+ static py::exception<QPDFExc> exc_main(m, "PdfError");
+ static py::exception<QPDFExc> exc_password(m, "PasswordError");
+ py::register_exception_translator([](std::exception_ptr p) {
+ try {
+ if (p) std::rethrow_exception(p);
+ } catch (const QPDFExc &e) {
+ if (e.getErrorCode() == qpdf_e_password) {
+ exc_password(e.what());
+ } else {
+ exc_main(e.what());
+ }
+ } catch (const QPDFSystemError &e) {
+ if (e.getErrno() != 0) {
+ TemporaryErrnoChange errno_holder(e.getErrno());
+ PyErr_SetFromErrnoWithFilename(PyExc_OSError, e.getDescription().c_str());
+ } else {
+ exc_main(e.what());
+ }
+ }
+ });
+
+
+#ifdef VERSION_INFO
+ m.attr("__version__") = VERSION_INFO;
+#else
+ m.attr("__version__") = "dev";
+#endif
+}
diff --git a/src/qpdf/pikepdf.h b/src/qpdf/pikepdf.h
index 7fbd6e8..0acd807 100644
--- a/src/qpdf/pikepdf.h
+++ b/src/qpdf/pikepdf.h
@@ -40,9 +40,6 @@ namespace pybind11 { namespace detail {
};
}}
-#define CUSTOM_TYPE_CONVERSION 1
-#if CUSTOM_TYPE_CONVERSION
-
// From object_convert.cpp
pybind11::object decimal_from_pdfobject(QPDFObjectHandle h);
@@ -57,24 +54,9 @@ namespace pybind11 { namespace detail {
* Conversion part 1 (Python->C++): convert a PyObject into a Object
*/
bool load(handle src, bool convert) {
- // if (src.is_none()) {
- // if (!convert) return false;
- // value = QPDFObjectHandle::newNull();
- // return true;
- // }
- // Attempting to construct these does not work...
- // if (convert) {
- // if (PYBIND11_LONG_CHECK(src.ptr())) {
- // auto as_int = src.cast<long long>();
- // value = QPDFObjectHandle::newInteger(as_int);
- // } /*else if (PyFloat_Check(src.ptr())) {
- // auto as_double = src.cast<double>();
- // value = QPDFObjectHandle::newReal(as_double);
- // } */ else {
- // return base::load(src, convert);
- // }
- // return true;
- // }
+ // Do whatever our base does
+ // Potentially we could convert some scalrs to QPDFObjectHandle here,
+ // but most of the interfaces just expect straight C++ types.
return base::load(src, convert);
}
@@ -157,7 +139,6 @@ namespace pybind11 { namespace detail {
}
};
}} // namespace pybind11::detail
-#endif
namespace py = pybind11;
@@ -166,6 +147,8 @@ PYBIND11_MAKE_OPAQUE(std::vector<QPDFObjectHandle>);
typedef std::map<std::string, QPDFObjectHandle> ObjectMap;
PYBIND11_MAKE_OPAQUE(ObjectMap);
+// From qpdf.cpp
+void init_qpdf(py::module& m);
// From object.cpp
size_t list_range_check(QPDFObjectHandle h, int index);
@@ -183,6 +166,9 @@ QPDFObjectHandle objecthandle_encode(const py::handle handle);
std::vector<QPDFObjectHandle> array_builder(const py::iterable iter);
std::map<std::string, QPDFObjectHandle> dict_builder(const py::dict dict);
+// From annotation.cpp
+void init_annotation(py::module &m);
+
// Support for recursion checks
class StackGuard
{
diff --git a/src/qpdf/qpdf.cpp b/src/qpdf/qpdf.cpp
index 5bb8ea9..0a5fc26 100644
--- a/src/qpdf/qpdf.cpp
+++ b/src/qpdf/qpdf.cpp
@@ -28,10 +28,9 @@
#include "qpdf_pagelist.h"
#include "qpdf_inputsource.h"
+#include "qpdf_pipeline.h"
#include "utils.h"
-extern "C" const char* qpdf_get_qpdf_version();
-
void check_stream_is_usable(py::object stream)
{
@@ -58,6 +57,7 @@ open_pdf(
q->setPasswordIsHexKey(hex_password);
q->setIgnoreXRefStreams(ignore_xref_streams);
q->setAttemptRecovery(attempt_recovery);
+ q->setImmediateCopyFrom(true);
if (py::hasattr(filename_or_stream, "read") && py::hasattr(filename_or_stream, "seek")) {
// Python code gave us an object with a stream interface
@@ -66,7 +66,7 @@ open_pdf(
check_stream_is_usable(stream);
// The PythonInputSource object will be owned by q
- InputSource* input_source = new PythonInputSource(stream);
+ auto input_source = PointerHolder<InputSource>(new PythonInputSource(stream));
py::gil_scoped_release release;
q->processInputSource(input_source, password.c_str());
} else {
@@ -80,7 +80,7 @@ open_pdf(
q->processFile(
description.c_str(),
file, // transferring ownership
- true, // QPDF will close the file
+ true, // QPDF will close the file (including if there are exceptions)
password.c_str()
);
file = nullptr; // QPDF owns the file and will close it
@@ -116,122 +116,320 @@ private:
};
+void update_xmp_pdfversion(QPDF &q, std::string version)
+{
+ auto impl = py::module::import("pikepdf._cpphelpers").attr("update_xmp_pdfversion");
+ auto pypdf = py::cast(q);
+ impl(pypdf, version);
+}
+
+
+void setup_encryption(
+ QPDFWriter &w,
+ py::object encryption,
+ std::string &owner,
+ std::string &user
+)
+{
+ bool aes = true;
+ bool metadata = true;
+ std::map<std::string, bool> allow;
+ int encryption_level = 6;
+
+ if (encryption.contains("R")) {
+ if (!py::isinstance<py::int_>(encryption["R"]))
+ throw py::type_error("Encryption level 'R' must be an integer");
+ encryption_level = py::int_(encryption["R"]);
+ }
+ if (encryption_level < 2 || encryption_level > 6)
+ throw py::value_error("Invalid encryption level: must be 2, 3, 4 or 6");
+
+ if (encryption_level == 5) {
+ auto warn = py::module::import("warnings").attr("warn");
+ warn("Encryption R=5 is deprecated");
+ }
+
+ if (encryption.contains("owner")) {
+ if (encryption_level <= 4) {
+ auto success = QUtil::utf8_to_pdf_doc(encryption["owner"].cast<std::string>(), owner);
+ if (!success)
+ throw py::value_error("Encryption level is R3/R4 and password is not encodable as PDFDocEncoding");
+ } else {
+ owner = encryption["owner"].cast<std::string>();
+ }
+ }
+ if (encryption.contains("user")) {
+ if (encryption_level <= 4) {
+ auto success = QUtil::utf8_to_pdf_doc(encryption["user"].cast<std::string>(), user);
+ if (!success)
+ throw py::value_error("Encryption level is R3/R4 and password is not encodable as PDFDocEncoding");
+ } else {
+ user = encryption["user"].cast<std::string>();
+ }
+ }
+ if (encryption.contains("allow")) {
+ auto pyallow = encryption["allow"];
+ allow["accessibility"] = pyallow.attr("accessibility").cast<bool>();
+ allow["extract"] = pyallow.attr("extract").cast<bool>();
+ allow["modify_assembly"] = pyallow.attr("modify_assembly").cast<bool>();
+ allow["modify_annotation"] = pyallow.attr("modify_annotation").cast<bool>();
+ allow["modify_form"] = pyallow.attr("modify_form").cast<bool>();
+ allow["modify_other"] = pyallow.attr("modify_other").cast<bool>();
+ allow["print_lowres"] = pyallow.attr("print_lowres").cast<bool>();
+ allow["print_highres"] = pyallow.attr("print_highres").cast<bool>();
+ }
+ if (encryption.contains("aes")) {
+ if (py::isinstance<py::bool_>(encryption["aes"]))
+ aes = py::bool_(encryption["aes"]);
+ else
+ throw py::type_error("aes must be bool");
+ } else {
+ aes = (encryption_level >= 4);
+ }
+ if (encryption.contains("metadata")) {
+ if (py::isinstance<py::bool_>(encryption["metadata"]))
+ metadata = py::bool_(encryption["metadata"]);
+ else
+ throw py::type_error("metadata must be bool");
+ } else {
+ metadata = (encryption_level >= 4);
+ }
+
+ if (metadata && encryption_level < 4) {
+ throw py::value_error("Cannot encrypt metadata when R < 4");
+ }
+ if (aes && encryption_level < 4) {
+ throw py::value_error("Cannot encrypt with AES when R < 4");
+ }
+ if (encryption_level == 6 && !aes) {
+ throw py::value_error("When R = 6, AES encryption must be enabled");
+ }
+ if (metadata && !aes) {
+ throw py::value_error("Cannot encrypt metadata unless AES encryption is enabled");
+ }
+
+ qpdf_r3_print_e print;
+ if (allow["print_highres"])
+ print = qpdf_r3p_full;
+ else if (allow["print_lowres"])
+ print = qpdf_r3p_low;
+ else
+ print = qpdf_r3p_none;
+
+ if (encryption_level == 6) {
+ w.setR6EncryptionParameters(
+ user.c_str(), owner.c_str(),
+ allow["accessibility"],
+ allow["extract"],
+ allow["modify_assembly"],
+ allow["modify_annotation"],
+ allow["modify_form"],
+ allow["modify_other"],
+ print,
+ metadata
+ );
+ } else if (encryption_level == 5) {
+ // TODO WARNING
+ w.setR5EncryptionParameters(
+ user.c_str(), owner.c_str(),
+ allow["accessibility"],
+ allow["extract"],
+ allow["modify_assembly"],
+ allow["modify_annotation"],
+ allow["modify_form"],
+ allow["modify_other"],
+ print,
+ metadata
+ );
+ } else if (encryption_level == 4) {
+ w.setR4EncryptionParameters(
+ user.c_str(), owner.c_str(),
+ allow["accessibility"],
+ allow["extract"],
+ allow["modify_assembly"],
+ allow["modify_annotation"],
+ allow["modify_form"],
+ allow["modify_other"],
+ print,
+ metadata,
+ aes
+ );
+ } else if (encryption_level == 3) {
+ w.setR3EncryptionParameters(
+ user.c_str(), owner.c_str(),
+ allow["accessibility"],
+ allow["extract"],
+ allow["modify_assembly"],
+ allow["modify_annotation"],
+ allow["modify_form"],
+ allow["modify_other"],
+ print
+ );
+ } else if (encryption_level == 2) {
+ w.setR2EncryptionParameters(
+ user.c_str(), owner.c_str(),
+ (print != qpdf_r3p_none),
+ allow["modify_assembly"],
+ allow["extract"],
+ allow["modify_annotation"]
+ );
+ }
+}
+
+
+typedef std::pair<std::string, int> pdf_version_extension;
+
+pdf_version_extension get_version_extension(py::object ver_ext)
+{
+ std::string version = "";
+ int extension = 0;
+ try {
+ version = ver_ext.cast<std::string>();
+ extension = 0;
+ } catch (py::cast_error) {
+ try {
+ auto version_ext = ver_ext.cast<pdf_version_extension>();
+ version = version_ext.first;
+ extension = version_ext.second;
+ } catch (py::cast_error) {
+ throw py::type_error("PDF version must be a tuple: (str, int)");
+ }
+ }
+ return pdf_version_extension(version, extension);
+}
+
+
+/* Helper class to ensure streams we open get closed by destructor */
+class Closer
+{
+public:
+ Closer() : monitored(py::none()) {}
+ ~Closer() {
+ if (!this->monitored.is_none()) {
+ this->monitored.attr("close")();
+ }
+ }
+ void set(py::object monitored) {
+ this->monitored = monitored;
+ }
+ Closer(const Closer& other) = delete;
+ Closer(Closer&& other) = delete;
+ Closer& operator= (const Closer& other) = delete;
+ Closer& operator= (Closer&& other) = delete;
+
+private:
+ py::object monitored;
+};
+
void save_pdf(
QPDF& q,
py::object filename_or_stream,
bool static_id=false,
bool preserve_pdfa=true,
- std::string min_version="",
- std::string force_version="",
+ py::object min_version=py::none(),
+ py::object force_version=py::none(),
+ bool fix_metadata_version=true,
bool compress_streams=true,
- qpdf_stream_decode_level_e stream_decode_level=qpdf_dl_generalized,
+ py::object stream_decode_level=py::none(),
qpdf_object_stream_e object_stream_mode=qpdf_o_preserve,
bool normalize_content=false,
bool linearize=false,
bool qdf=false,
- py::object progress=py::none())
+ py::object progress=py::none(),
+ py::object encryption=py::none())
{
+ std::string owner;
+ std::string user;
+ std::string description;
QPDFWriter w(q);
- // Parameters
if (static_id) {
w.setStaticID(true);
}
w.setNewlineBeforeEndstream(preserve_pdfa);
- if (!min_version.empty()) {
- w.setMinimumPDFVersion(min_version, 0);
- }
- if (!force_version.empty()) {
- w.forcePDFVersion(force_version, 0);
+
+ if (!min_version.is_none()) {
+ auto version_ext = get_version_extension(min_version);
+ w.setMinimumPDFVersion(version_ext.first, version_ext.second);
}
w.setCompressStreams(compress_streams);
- w.setDecodeLevel(stream_decode_level);
+ if (!stream_decode_level.is_none()) {
+ // Unconditionally calling setDecodeLevel has side effects, disabling
+ // preserve encryption in particular
+ w.setDecodeLevel(stream_decode_level.cast<qpdf_stream_decode_level_e>());
+ }
w.setObjectStreamMode(object_stream_mode);
- if (normalize_content && linearize) {
- throw py::value_error("cannot save with both normalize_content and linearize");
- }
- w.setContentNormalization(normalize_content);
- w.setLinearization(linearize);
- w.setQDFMode(qdf);
-
- if (!progress.is_none()) {
- auto reporter = PointerHolder<QPDFWriter::ProgressReporter>(new PikeProgressReporter(progress));
- w.registerProgressReporter(reporter);
- }
+ py::object stream;
+ Closer stream_closer;
if (py::hasattr(filename_or_stream, "write") && py::hasattr(filename_or_stream, "seek")) {
// Python code gave us an object with a stream interface
- py::object stream = filename_or_stream;
+ stream = filename_or_stream;
check_stream_is_usable(stream);
+ description = py::repr(stream);
+ } else {
+ py::object filename = fspath(filename_or_stream);
+ py::object ospath = py::module::import("os").attr("path");
+ py::object samefile = ospath.attr("samefile");
+ py::object exists = ospath.attr("exists");
+ if (exists(filename).cast<bool>() && samefile(filename, q.getFilename()).cast<bool>()) {
+ throw py::value_error("Cannot overwrite input file");
+ }
+ stream = py::module::import("io").attr("open")(filename, "wb");
+ stream_closer.set(stream);
+ description = py::str(filename);
+ }
- // TODO might be able to improve this by streaming rather than buffering
- // using subclass of Pipeline that routes calls to Python.
- w.setOutputMemory();
+ // We must set up the output pipeline before we configure encryption
+ Pl_PythonOutput output_pipe(description.c_str(), stream);
+ w.setOutputPipeline(&output_pipe);
- // It would be kind to release the GIL here, but this is not possible if
- // another thread has an object and tries to mess with it. Correctness
- // is more important than performance.
- w.write();
+ if (encryption.is(py::bool_(true)) && !q.isEncrypted()) {
+ throw py::value_error("can't perserve encryption parameters on a file with no encryption");
+ }
- // But now that we've held the GIL forever, we can release it and take
- // it back again; at least in theory giving other threads a chance to
- // to do something.
- {
- py::gil_scoped_release release;
- }
+ if (
+ (encryption.is(py::bool_(true)) || py::isinstance<py::dict>(encryption))
+ && (normalize_content || !stream_decode_level.is_none())
+ ) {
+ throw py::value_error("cannot save with encryption and normalize_content or stream_decode_level");
+ }
- // getBuffer returns Buffer* and qpdf says we are responsible for
- // deleting it, so capture it in a unique_ptr
- std::unique_ptr<Buffer> output_buffer(w.getBuffer());
-
- // Create a memoryview of the buffer that libqpdf created
- // Awkward API alert:
- // QPDFWriter::getBuffer -> Buffer* (caller frees memory)
- // and Buffer::getBuffer -> unsigned char* (caller does not own memory)
- py::buffer_info output_buffer_info(
- output_buffer->getBuffer(),
- output_buffer->getSize());
- py::memoryview view_output_buffer(output_buffer_info);
-
- // Send it to the stream object (probably copying)
- stream.attr("write")(view_output_buffer);
+ if (encryption.is(py::bool_(true))) {
+ w.setPreserveEncryption(true); // Keep existing encryption
+ } else if (encryption.is_none() || encryption.is(py::bool_(false))) {
+ w.setPreserveEncryption(false); // Remove encryption
} else {
- py::object filename = filename_or_stream;
- std::string description = py::str(filename);
- // Delete the intended filename, in case it is the same as the input file.
- // This ensures that the input file will continue to exist in memory on Linux.
- portable_unlink(filename);
- FILE* file = portable_fopen(filename, "wb");
- w.setOutputFile(description.c_str(), file, true);
- w.write();
- file = nullptr; // QPDF will close it
+ setup_encryption(w, encryption, owner, user);
}
-}
+ if (normalize_content && linearize) {
+ throw py::value_error("cannot save with both normalize_content and linearize");
+ }
+ w.setContentNormalization(normalize_content);
+ w.setLinearization(linearize);
+ w.setQDFMode(qdf);
-PYBIND11_MODULE(_qpdf, m) {
- //py::options options;
- //options.disable_function_signatures();
+ if (!force_version.is_none()) {
+ auto version_ext = get_version_extension(force_version);
+ w.forcePDFVersion(version_ext.first, version_ext.second);
+ }
+ if (fix_metadata_version) {
+ update_xmp_pdfversion(q, w.getFinalVersion());
+ }
- m.doc() = "pikepdf provides a Pythonic interface for QPDF";
+ if (!progress.is_none()) {
+ auto reporter = PointerHolder<QPDFWriter::ProgressReporter>(new PikeProgressReporter(progress));
+ w.registerProgressReporter(reporter);
+ }
- m.def("qpdf_version", &qpdf_get_qpdf_version, "Get libqpdf version");
+ w.write();
+}
- static py::exception<QPDFExc> exc_main(m, "PdfError");
- static py::exception<QPDFExc> exc_password(m, "PasswordError");
- py::register_exception_translator([](std::exception_ptr p) {
- try {
- if (p) std::rethrow_exception(p);
- } catch (const QPDFExc &e) {
- if (e.getErrorCode() == qpdf_e_password) {
- exc_password(e.what());
- } else {
- exc_main(e.what());
- }
- }
- });
+void init_qpdf(py::module &m)
+{
py::enum_<qpdf_object_stream_e>(m, "ObjectStreamMode")
.value("disable", qpdf_object_stream_e::qpdf_o_disable)
.value("preserve", qpdf_object_stream_e::qpdf_o_preserve)
@@ -243,7 +441,12 @@ PYBIND11_MODULE(_qpdf, m) {
.value("specialized", qpdf_stream_decode_level_e::qpdf_dl_specialized)
.value("all", qpdf_stream_decode_level_e::qpdf_dl_all);
- init_pagelist(m);
+ py::enum_<QPDF::encryption_method_e>(m, "EncryptionMethod")
+ .value("none", QPDF::encryption_method_e::e_none)
+ .value("unknown", QPDF::encryption_method_e::e_unknown)
+ .value("rc4", QPDF::encryption_method_e::e_rc4)
+ .value("aes", QPDF::encryption_method_e::e_aes)
+ .value("aesv3", QPDF::encryption_method_e::e_aesv3);
py::class_<QPDF, std::shared_ptr<QPDF>>(m, "Pdf", "In-memory representation of a PDF")
.def_static("new",
@@ -253,20 +456,31 @@ PYBIND11_MODULE(_qpdf, m) {
q->setSuppressWarnings(true);
return q;
},
- "create a new empty PDF from stratch"
+ "Create a new empty PDF from stratch."
)
.def_static("open", open_pdf,
R"~~~(
- Open an existing file at `filename_or_stream`.
+ Open an existing file at *filename_or_stream*.
- If `filename_or_stream` is path-like, the file will be opened. The
- file should not be modified by another process while it is open in
- pikepdf.
+ If *filename_or_stream* is path-like, the file will be opened for reading.
+ The file should not be modified by another process while it is open in
+ pikepdf. The file will not be altered when opened in this way. Any changes
+ to the file must be persisted by using ``.save()``.
- If `filename_or_stream` has `.read()` and `.seek()` methods, the file
+ If *filename_or_stream* has ``.read()`` and ``.seek()`` methods, the file
will be accessed as a readable binary stream. pikepdf will read the
entire stream into a private buffer.
+ ``.open()`` may be used in a ``with``-block, ``.close()`` will be called when
+ the block exists.
+
+ Examples:
+
+ >>> with Pdf.open("test.pdf") as pdf:
+ ...
+
+ >>> pdf = Pdf.open("test.pdf", password="rosebud")
+
Args:
filename_or_stream (os.PathLike): Filename of PDF to open
password (str or bytes): User or owner password to open an
@@ -278,7 +492,8 @@ PYBIND11_MODULE(_qpdf, m) {
ignore_xref_streams (bool): If True, ignore cross-reference
streams. See qpdf documentation.
suppress_warnings (bool): If True (default), warnings are not
- printed to stderr. Use `get_warnings()` to retrieve warnings.
+ printed to stderr. Use :meth:`pikepdf.Pdf.get_warnings()` to
+ retrieve warnings.
attempt_recovery (bool): If True (default), attempt to recover
from PDF parsing errors.
inherit_page_attributes (bool): If True (default), push attributes
@@ -289,7 +504,7 @@ PYBIND11_MODULE(_qpdf, m) {
file.
pikepdf.PdfError: If for other reasons we could not open
the file.
- TypeError: If the type of `filename_or_stream` is not
+ TypeError: If the type of ``filename_or_stream`` is not
usable.
FileNotFoundError: If the file was not found.
)~~~",
@@ -307,15 +522,15 @@ PYBIND11_MODULE(_qpdf, m) {
}
)
.def_property_readonly("filename", &QPDF::getFilename,
- "the source filename of an existing PDF, when available")
+ "The source filename of an existing PDF, when available.")
.def_property_readonly("pdf_version", &QPDF::getPDFVersion,
- "the PDF standard version, such as '1.7'")
+ "The PDF standard version, such as '1.7'.")
.def_property_readonly("extension_level", &QPDF::getExtensionLevel)
.def_property_readonly("Root", &QPDF::getRoot,
- "the /Root object of the PDF"
+ "The /Root object of the PDF."
)
.def_property_readonly("root", &QPDF::getRoot,
- "alias for .Root, the /Root object of the PDF"
+ "Alias for .Root, the /Root object of the PDF."
)
.def_property("docinfo",
[](QPDF& q) {
@@ -330,7 +545,16 @@ PYBIND11_MODULE(_qpdf, m) {
throw py::value_error("docinfo must be an indirect object - use Pdf.make_indirect");
q.getTrailer().replaceKey("/Info", replace);
},
- "access the document information dictionary"
+ R"~~~(
+ Access the (deprecated) document information dictionary.
+
+ The document information dictionary is a brief metadata record
+ that can store some information about the origin of a PDF. It is
+ deprecated and removed in the PDF 2.0 specification. Use the
+ ``.open_metadata()`` API instead, which will edit the modern (and
+ unfortunately, more complicated) XMP metadata object and synchronize
+ changes to the document information dictionary.
+ )~~~"
)
.def_property_readonly("trailer", &QPDF::getTrailer,
R"~~~(
@@ -394,8 +618,9 @@ PYBIND11_MODULE(_qpdf, m) {
The page can be either be a newly constructed PDF object or it can
be obtained from another PDF.
- :param pikepdf.Object page: The page object to attach
- :param bool first: If True, prepend this before the first page; if False append after last page
+ Args:
+ page (pikepdf.Object): The page object to attach
+ first (bool): If True, prepend this before the first page; if False append after last page
)~~~",
py::arg("page"),
py::arg("first")=false,
@@ -423,11 +648,14 @@ PYBIND11_MODULE(_qpdf, m) {
.def("save",
save_pdf,
R"~~~(
- Save all modifications to this :class:`pikepdf.Pdf`
+ Save all modifications to this :class:`pikepdf.Pdf`.
Args:
filename (str or stream): Where to write the output. If a file
- exists in this location it will be overwritten.
+ exists in this location it will be overwritten. The file
+ should not be the same as the input file, because data from
+ the input file may be lazily loaded; as such overwriting
+ in place will null-out objects.
static_id (bool): Indicates that the ``/ID`` metadata, normally
calculated as a hash of certain PDF contents and metadata
@@ -437,12 +665,20 @@ PYBIND11_MODULE(_qpdf, m) {
manner compliant with PDF/A and other stricter variants.
This should be True, the default, in most cases.
- min_version (str): Sets the minimum version of PDF
+ min_version (str or tuple): Sets the minimum version of PDF
specification that should be required. If left alone QPDF
- will decide.
- force_version (str): Override the version recommend by QPDF,
+ will decide. If a tuple, the second element is an integer, the
+ extension level.
+ force_version (str or tuple): Override the version recommend by QPDF,
potentially creating an invalid file that does not display
- in old versions. See QPDF manual for details.
+ in old versions. See QPDF manual for details. If a tuple, the
+ second element is an integer, the extension level.
+ fix_metadata_version (bool): If True (default) and the XMP metadata
+ contains the optional PDF version field, ensure the version in
+ metadata is correct. If the XMP metadata does not contain a PDF
+ version field, none will be added. To ensure that the field is
+ added, edit the metadata and insert a placeholder value in
+ ``pdf:PDFVersion``.
object_stream_mode (pikepdf.ObjectStreamMode):
``disable`` prevents the use of object streams.
@@ -472,10 +708,24 @@ PYBIND11_MODULE(_qpdf, m) {
the program ``fix-qdf`` to fix convert back to a standard
PDF.
+ progress (callable): Specify a callback function that is called
+ as the PDF is written. The function will be called with an
+ integer between 0-100 as the sole parameter, the progress
+ percentage. This function may not access or modify the PDF
+ while it is being written, or data corruption will almost
+ certainly occur.
+
+ encryption (pikepdf.models.Encryption or bool): If ``False``
+ or omitted, existing encryption will be removed. If ``True``
+ encryption settings are copied from the originating PDF.
+ Alternately, an ``Encryption`` object may be provided that
+ sets the parameters for new encryption.
+
You may call ``.save()`` multiple times with different parameters
to generate different versions of a file, and you *may* continue
to modify the file after saving it. ``.save()`` does not modify
- the ``Pdf`` object in memory.
+ the ``Pdf`` object in memory, except possibly by updating the XMP
+ metadata version with ``fix_metadata_version``.
.. note::
@@ -491,13 +741,15 @@ PYBIND11_MODULE(_qpdf, m) {
py::arg("preserve_pdfa")=true,
py::arg("min_version")="",
py::arg("force_version")="",
+ py::arg("fix_metadata_version")=true,
py::arg("compress_streams")=true,
- py::arg("stream_decode_level")=qpdf_stream_decode_level_e::qpdf_dl_generalized,
+ py::arg("stream_decode_level")=py::none(),
py::arg("object_stream_mode")=qpdf_object_stream_e::qpdf_o_preserve,
py::arg("normalize_content")=false,
py::arg("linearize")=false,
py::arg("qdf")=false,
- py::arg("progress")=py::none()
+ py::arg("progress")=py::none(),
+ py::arg("encryption")=py::none()
)
.def("_get_object_id", &QPDF::getObjectByID)
.def("get_object",
@@ -510,7 +762,8 @@ PYBIND11_MODULE(_qpdf, m) {
Returns:
pikepdf.Object
)~~~",
- py::return_value_policy::reference_internal
+ py::return_value_policy::reference_internal,
+ py::arg("objgen")
)
.def("get_object",
[](QPDF &q, int objid, int gen) {
@@ -522,7 +775,9 @@ PYBIND11_MODULE(_qpdf, m) {
Returns:
pikepdf.Object
)~~~",
- py::return_value_policy::reference_internal
+ py::return_value_policy::reference_internal,
+ py::arg("objid"),
+ py::arg("gen")
)
.def("make_indirect", &QPDF::makeIndirectObject,
R"~~~(
@@ -544,7 +799,8 @@ PYBIND11_MODULE(_qpdf, m) {
Returns:
pikepdf.Object
- )~~~"
+ )~~~",
+ py::arg("h")
)
.def("make_indirect",
[](QPDF &q, py::object obj) -> QPDFObjectHandle {
@@ -555,7 +811,8 @@ PYBIND11_MODULE(_qpdf, m) {
Returns:
pikepdf.Object
- )~~~"
+ )~~~",
+ py::arg("obj")
)
.def("copy_foreign",
[](QPDF &q, QPDFObjectHandle &h) -> QPDFObjectHandle {
@@ -563,20 +820,106 @@ PYBIND11_MODULE(_qpdf, m) {
},
"Copy object from foreign PDF to this one.",
py::return_value_policy::reference_internal,
- py::keep_alive<1, 2>()
+ py::keep_alive<1, 2>(),
+ py::arg("h")
)
.def("_replace_object",
[](QPDF &q, int objid, int gen, QPDFObjectHandle &h) {
q.replaceObject(objid, gen, h);
}
)
- ; // class Pdf
-
- init_object(m);
+ .def("_swap_objects",
+ [](QPDF &q, std::pair<int, int> objgen1, std::pair<int, int> objgen2) {
+ QPDFObjGen o1(objgen1.first, objgen1.second);
+ QPDFObjGen o2(objgen2.first, objgen2.second);
+ q.swapObjects(o1, o2);
+ }
+ )
+ .def("_process",
+ [](QPDF &q, std::string description, py::bytes data) {
+ std::string s = data;
+ q.processMemoryFile(
+ description.c_str(),
+ s.data(),
+ s.size()
+ );
+ },
+ R"~~~(
+ Process a new in-memory PDF, replacing the existing PDF
-#ifdef VERSION_INFO
- m.attr("__version__") = VERSION_INFO;
-#else
- m.attr("__version__") = "dev";
-#endif
+ Used to implement Pdf.close().
+ )~~~"
+ )
+ .def_property_readonly("_allow_accessibility",
+ [](QPDF &q) {
+ return q.allowAccessibility();
+ }
+ )
+ .def_property_readonly("_allow_extract",
+ [](QPDF &q) {
+ return q.allowExtractAll();
+ }
+ )
+ .def_property_readonly("_allow_print_lowres",
+ [](QPDF &q) {
+ return q.allowPrintLowRes();
+ }
+ )
+ .def_property_readonly("_allow_print_highres",
+ [](QPDF &q) {
+ return q.allowPrintHighRes();
+ }
+ )
+ .def_property_readonly("_allow_modify_assembly",
+ [](QPDF &q) {
+ return q.allowModifyAssembly();
+ }
+ )
+ .def_property_readonly("_allow_modify_form",
+ [](QPDF &q) {
+ return q.allowModifyForm();
+ }
+ )
+ .def_property_readonly("_allow_modify_annotation",
+ [](QPDF &q) {
+ return q.allowModifyAnnotation();
+ }
+ )
+ .def_property_readonly("_allow_modify_other",
+ [](QPDF &q) {
+ return q.allowModifyOther();
+ }
+ )
+ .def_property_readonly("_allow_modify_all",
+ [](QPDF &q) {
+ return q.allowModifyAll();
+ }
+ )
+ .def_property_readonly("_encryption_data",
+ [](QPDF &q) {
+ int R = 0;
+ int P = 0;
+ int V = 0;
+ QPDF::encryption_method_e stream_method = QPDF::e_unknown;
+ QPDF::encryption_method_e string_method = QPDF::e_unknown;
+ QPDF::encryption_method_e file_method = QPDF::e_unknown;
+ if (!q.isEncrypted(R, P, V, stream_method, string_method, file_method))
+ return py::dict();
+
+ auto user_passwd = q.getTrimmedUserPassword();
+ auto encryption_key = q.getEncryptionKey();
+
+ return py::dict(
+ py::arg("R") = R,
+ py::arg("P") = P,
+ py::arg("V") = V,
+ py::arg("stream") = stream_method,
+ py::arg("string") = string_method,
+ py::arg("file") = file_method,
+ py::arg("user_passwd") = py::bytes(user_passwd),
+ py::arg("encryption_key") = py::bytes(encryption_key)
+ );
+ }
+ )
+ ; // class Pdf
}
diff --git a/src/qpdf/qpdf_inputsource.h b/src/qpdf/qpdf_inputsource.h
index dc26267..b29b309 100644
--- a/src/qpdf/qpdf_inputsource.h
+++ b/src/qpdf/qpdf_inputsource.h
@@ -17,7 +17,7 @@
#include <qpdf/Buffer.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/InputSource.hh>
-
+#include <qpdf/QUtil.hh>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
diff --git a/src/qpdf/qpdf_pagelist.cpp b/src/qpdf/qpdf_pagelist.cpp
index d8222dd..07c496d 100644
--- a/src/qpdf/qpdf_pagelist.cpp
+++ b/src/qpdf/qpdf_pagelist.cpp
@@ -121,18 +121,6 @@ void PageList::set_pages_from_iterable(py::slice slice, py::iterable other)
void PageList::delete_page(size_t index)
{
auto page = this->get_page(index);
- /*
- // Need a dec_ref to match the inc_ref in insert_page, but it's unclear
- // how to do that. The item will be set the current QPDF always.
- // Accessing data from another PDF seems to involve some pipeline
- // magic in QPDF around libqpdf/QPDFWriter.cc:1614
- if (original page owner != &this->getQPDF()) {
- // If we are removing a page not originally owned by our QPDF,
- // remove the reference count we put it in insert_page()
- py::object pyqpdf = py::cast(page_owner);
- pyqpdf.dec_ref();
- }
- */
this->qpdf->removePage(page);
}
@@ -175,24 +163,6 @@ void PageList::insert_page(size_t index, QPDFObjectHandle page)
// qpdf does not accept duplicating pages within the same file,
// so manually create a copy
page = this->qpdf->makeIndirectObject(page);
- } else {
- // libqpdf does not transfer a page's contents to the new QPDF.
- // Instead WHEN ASKED TO WRITE it will go back and get the data
- // from objecthandle->getOwningQPDF(). Therefore we must ensure
- // our previous owner is kept alive.
-#if 1
- auto tinfo = py::detail::get_type_info(typeid(QPDF));
- py::handle pyqpdf = py::detail::get_object_handle(page_owner, tinfo);
- py::handle pypage = py::cast(page);
- py::detail::keep_alive_impl(pypage, pyqpdf);
-#else
- // MSVC++ complains about the symbol
- // QPDF::Members::~Members() not being exported when this version
- // is used, but it works for GCC and Clang.
- py::handle pyqpdf = py::cast(page_owner);
- py::handle pypage = py::cast(page);
- py::detail::keep_alive_impl(pypage, pyqpdf);
-#endif
}
if (index != this->count()) {
QPDFObjectHandle refpage = this->get_page(index);
@@ -202,7 +172,6 @@ void PageList::insert_page(size_t index, QPDFObjectHandle page)
}
}
-
void init_pagelist(py::module &m)
{
py::class_<PageList>(m, "PageList")
@@ -229,12 +198,13 @@ void init_pagelist(py::module &m)
.def("__delitem__", &PageList::delete_pages_from_iterable)
.def("__len__", &PageList::count)
.def("p",
- [](PageList &pl, size_t index) {
- if (index == 0) // Indexing past end is checked in .get_page
+ [](PageList &pl, size_t pnum) {
+ if (pnum == 0) // Indexing past end is checked in .get_page
throw py::index_error("page access out of range in 1-based indexing");
- return pl.get_page(index - 1);
+ return pl.get_page(pnum - 1);
},
- "convenience - look up page number in ordinal numbering, .p(1) is first page"
+ "Convenience - look up page number in ordinal numbering, ``.p(1)`` is first page",
+ py::arg("pnum")
)
.def("__iter__",
[](PageList &pl) {
@@ -252,7 +222,16 @@ void init_pagelist(py::module &m)
[](PageList &pl, ssize_t index, py::object obj) {
size_t uindex = uindex_from_index(pl, index);
pl.insert_page(uindex, obj);
- }, py::keep_alive<1, 3>()
+ }, py::keep_alive<1, 3>(),
+ R"~~~(
+ Insert a page at the specified location.
+
+ Args:
+ index (int): location at which to insert page, 0-based indexing
+ obj (pikepdf.Object): page object to insert
+ )~~~",
+ py::arg("index"),
+ py::arg("obj")
)
.def("reverse",
[](PageList &pl) {
@@ -262,13 +241,16 @@ void init_pagelist(py::module &m)
PySlice_New(Py_None, Py_None, step.ptr()));
py::list reversed_pages = pl.get_pages(reversed);
pl.set_pages_from_iterable(ordinary_indices, reversed_pages);
- }
+ },
+ "Reverse the order of pages."
)
.def("append",
[](PageList &pl, py::object page) {
pl.insert_page(pl.count(), page);
},
- py::keep_alive<1, 2>()
+ py::keep_alive<1, 2>(),
+ "Add another page to the end.",
+ py::arg("page")
)
.def("extend",
[](PageList &pl, PageList &other) {
@@ -279,7 +261,9 @@ void init_pagelist(py::module &m)
pl.insert_page(pl.count(), other.get_page(i));
}
},
- py::keep_alive<1, 2>()
+ py::keep_alive<1, 2>(),
+ "Extend the ``Pdf`` by adding pages from another ``Pdf.pages``.",
+ py::arg("other")
)
.def("extend",
[](PageList &pl, py::iterable iterable) {
@@ -290,6 +274,29 @@ void init_pagelist(py::module &m)
++it;
}
},
- py::keep_alive<1, 2>()
+ py::keep_alive<1, 2>(),
+ "Extend the ``Pdf`` by adding pages from an iterable of pages.",
+ py::arg("iterable")
+ )
+ .def("remove",
+ [](PageList &pl, py::kwargs kwargs) {
+ auto pnum = kwargs["p"].cast<size_t>();
+ if (pnum == 0) // Indexing past end is checked in .get_page
+ throw py::index_error("page access out of range in 1-based indexing");
+ pl.delete_page(pnum - 1);
+ },
+ R"~~~(
+ Remove a page (using 1-based numbering)
+
+ Args:
+ p (int): 1-based page number
+ )~~~"
+ )
+ .def("__repr__",
+ [](PageList &pl) {
+ return std::string("<pikepdf._qpdf.PageList len=")
+ + std::to_string(pl.count())
+ + std::string(">");
+ }
);
}
diff --git a/src/qpdf/qpdf_pipeline.h b/src/qpdf/qpdf_pipeline.h
new file mode 100644
index 0000000..f922827
--- /dev/null
+++ b/src/qpdf/qpdf_pipeline.h
@@ -0,0 +1,77 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright (C) 2017, James R. Barlow (https://github.com/jbarlow83/)
+ */
+
+#include <cstdio>
+#include <cstring>
+
+#include <qpdf/Constants.h>
+#include <qpdf/Types.h>
+#include <qpdf/DLL.h>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Buffer.hh>
+#include <qpdf/QPDF.hh>
+#include <qpdf/Pipeline.hh>
+#include <qpdf/QUtil.hh>
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "pikepdf.h"
+
+
+class Pl_PythonOutput : public Pipeline
+{
+public:
+ Pl_PythonOutput(const char *identifier, py::object stream) :
+ Pipeline(identifier, nullptr),
+ stream(stream)
+ {
+ }
+
+ virtual ~Pl_PythonOutput() = default;
+ Pl_PythonOutput(const Pl_PythonOutput&) = delete;
+ Pl_PythonOutput& operator= (const Pl_PythonOutput&) = delete;
+ Pl_PythonOutput(Pl_PythonOutput&&) = delete;
+ Pl_PythonOutput& operator= (Pl_PythonOutput&&) = delete;
+
+ void write(unsigned char *buf, size_t len)
+ {
+ py::gil_scoped_acquire gil;
+ size_t so_far = 0;
+ while (len > 0) {
+ py::buffer_info buffer(buf, len);
+ py::memoryview view_buffer(buffer);
+ py::object result = this->stream.attr("write")(view_buffer);
+ try {
+ so_far = result.cast<size_t>();
+ } catch (const py::cast_error &e) {
+ throw py::type_error("Unexpected return type of write()");
+ }
+ if (so_far == 0) {
+ QUtil::throw_system_error(this->identifier);
+ } else {
+ buf += so_far;
+ len -= so_far;
+ }
+ }
+ }
+
+ void finish()
+ {
+ py::gil_scoped_acquire gil;
+ try {
+ this->stream.attr("flush")();
+ } catch (const py::attr_error &e) {
+ // Suppress
+ }
+ }
+
+private:
+ py::object stream;
+};