summaryrefslogtreecommitdiff
path: root/src/qpdf
diff options
context:
space:
mode:
authorJames R. Barlow <james@purplerock.ca>2022-02-05 00:02:30 -0800
committerJames R. Barlow <james@purplerock.ca>2022-02-05 00:02:30 -0800
commit8a47d2b0c2c8539245d5a81e324a94c51418c763 (patch)
treead82c3b68446f3189ea13278885c9677f899a05e /src/qpdf
parent280dd44a44ce1f725eddbffad937d0c485efa0f8 (diff)
Register JBIG2Decode using QPDF StreamFilter
More flexible than previous approach.
Diffstat (limited to 'src/qpdf')
-rw-r--r--src/qpdf/jbig2-inl.h120
-rw-r--r--src/qpdf/qpdf.cpp3
2 files changed, 123 insertions, 0 deletions
diff --git a/src/qpdf/jbig2-inl.h b/src/qpdf/jbig2-inl.h
new file mode 100644
index 0000000..1904f46
--- /dev/null
+++ b/src/qpdf/jbig2-inl.h
@@ -0,0 +1,120 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright (C) 2022, James R. Barlow (https://github.com/jbarlow83/)
+ */
+
+#include "pikepdf.h"
+
+#include <cstdio>
+#include <cstring>
+
+#include <qpdf/Constants.h>
+#include <qpdf/Types.h>
+#include <qpdf/DLL.h>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Buffer.hh>
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFStreamFilter.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/Pipeline.hh>
+
+unsigned char *pipeline_caster(const char *s)
+{
+ // QPDF indicates Pipeline::write(unsigned char*) is effectively const
+ // but not actually const for historical reasons, so we can discard the const.
+ // unsigned char* to char* should be safe.
+ return const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(s));
+}
+
+class Pl_JBIG2 : public Pipeline {
+public:
+ Pl_JBIG2(
+ const char *identifier, Pipeline *next, const std::string &jbig2globals = "")
+ : Pipeline(identifier, next), jbig2globals(jbig2globals)
+ {
+ }
+ virtual ~Pl_JBIG2() = default;
+
+ virtual void write(unsigned char *data, size_t len) override
+ {
+ this->ss.write(reinterpret_cast<const char *>(data), len);
+ }
+ virtual void finish() override
+ {
+ std::string data = this->ss.str();
+ if (data.empty()) {
+ if (this->getNext(true))
+ this->getNext()->finish();
+ return;
+ }
+
+ py::bytes pydata = py::bytes(data);
+ py::function extract_jbig2 =
+ py::module_::import("pikepdf.jbig2").attr("extract_jbig2_bytes");
+
+ py::bytes extracted = extract_jbig2(pydata, this->jbig2globals);
+
+ std::string extracted_cpp = std::string(extracted);
+
+ this->getNext()->write(
+ pipeline_caster(extracted_cpp.data()), extracted_cpp.length());
+
+ if (this->getNext(true)) {
+ this->getNext()->finish();
+ }
+ this->ss.clear();
+ }
+
+private:
+ py::bytes jbig2globals;
+ std::stringstream ss;
+};
+
+class JBIG2StreamFilter : public QPDFStreamFilter {
+public:
+ JBIG2StreamFilter() = default;
+ virtual ~JBIG2StreamFilter() = default;
+
+ virtual bool setDecodeParms(QPDFObjectHandle decode_parms) override
+ {
+ try {
+ auto jbig2dec_available =
+ py::module_::import("pikepdf.jbig2").attr("jbig2dec_available");
+ if (!jbig2dec_available())
+ return false;
+
+ auto jbig2globals_obj = decode_parms.getKey("/JBIG2Globals");
+ if (jbig2globals_obj.isNull())
+ return true;
+
+ auto buf = jbig2globals_obj.getStreamData();
+ this->jbig2globals =
+ std::string(reinterpret_cast<char *>(buf->getBuffer()), buf->getSize());
+ return true;
+ } catch (const std::exception &e) {
+ }
+ return false;
+ }
+ virtual Pipeline *getDecodePipeline(Pipeline *next) override
+ {
+ this->pipeline =
+ std::make_shared<Pl_JBIG2>("JBIG2 decode", next, this->jbig2globals);
+ return this->pipeline.get();
+ }
+
+ static std::shared_ptr<JBIG2StreamFilter> factory()
+ {
+ return std::make_shared<JBIG2StreamFilter>();
+ }
+
+ virtual bool isSpecializedCompression() override { return true; }
+ virtual bool isLossyCompression() override { return false; }
+
+private:
+ std::string jbig2globals;
+ std::shared_ptr<Pipeline> pipeline;
+}; \ No newline at end of file
diff --git a/src/qpdf/qpdf.cpp b/src/qpdf/qpdf.cpp
index bf54bdd..f54aa33 100644
--- a/src/qpdf/qpdf.cpp
+++ b/src/qpdf/qpdf.cpp
@@ -32,6 +32,7 @@
#include "qpdf_pagelist.h"
#include "qpdf_inputsource-inl.h"
#include "mmap_inputsource-inl.h"
+#include "jbig2-inl.h"
#include "pipeline.h"
#include "utils.h"
#include "gsl.h"
@@ -473,6 +474,8 @@ void save_pdf(QPDF &q,
void init_qpdf(py::module_ &m)
{
+ QPDF::registerStreamFilter("/JBIG2Decode", &JBIG2StreamFilter::factory);
+
py::enum_<qpdf_object_stream_e>(m, "ObjectStreamMode")
.value("disable", qpdf_object_stream_e::qpdf_o_disable)
.value("preserve", qpdf_object_stream_e::qpdf_o_preserve)