summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJames R. Barlow <james@purplerock.ca>2022-02-05 00:02:30 -0800
committerJames R. Barlow <james@purplerock.ca>2022-02-05 00:02:30 -0800
commit8a47d2b0c2c8539245d5a81e324a94c51418c763 (patch)
treead82c3b68446f3189ea13278885c9677f899a05e /src
parent280dd44a44ce1f725eddbffad937d0c485efa0f8 (diff)
Register JBIG2Decode using QPDF StreamFilter
More flexible than previous approach.
Diffstat (limited to 'src')
-rw-r--r--src/pikepdf/jbig2.py38
-rw-r--r--src/qpdf/jbig2-inl.h120
-rw-r--r--src/qpdf/qpdf.cpp3
3 files changed, 160 insertions, 1 deletions
diff --git a/src/pikepdf/jbig2.py b/src/pikepdf/jbig2.py
index 026b2ab..87f8550 100644
--- a/src/pikepdf/jbig2.py
+++ b/src/pikepdf/jbig2.py
@@ -25,7 +25,14 @@ def extract_jbig2(
global_path = Path(tmpdir) / "global"
output_path = Path(tmpdir) / "outfile"
- args = ["jbig2dec", "-e", "-o", os.fspath(output_path)]
+ args = [
+ "jbig2dec",
+ "--embedded",
+ "--format",
+ "png",
+ "--output",
+ os.fspath(output_path),
+ ]
# Get the raw stream, because we can't decode im_obj - that is why we are here
# (Strictly speaking we should remove any non-JBIG2 filters if double encoded)
@@ -45,6 +52,35 @@ def extract_jbig2(
return im
+def extract_jbig2_bytes(jbig2: bytes, jbig2_globals: bytes) -> bytes:
+ with TemporaryDirectory(prefix='pikepdf', suffix='.jbig2') as tmpdir:
+ image_path = Path(tmpdir) / "image"
+ global_path = Path(tmpdir) / "global"
+ output_path = Path(tmpdir) / "outfile"
+
+ args = [
+ "jbig2dec",
+ "--embedded",
+ "--format",
+ "png",
+ "--output",
+ os.fspath(output_path),
+ ]
+
+ # Get the raw stream, because we can't decode im_obj - that is why we are here
+ # (Strictly speaking we should remove any non-JBIG2 filters if double encoded)
+ image_path.write_bytes(jbig2)
+
+ if len(jbig2_globals) > 0:
+ global_path.write_bytes(jbig2_globals)
+ args.append(os.fspath(global_path))
+
+ args.append(os.fspath(image_path))
+
+ run(args, stdout=DEVNULL, check=True)
+ return output_path.read_bytes()
+
+
def jbig2dec_available() -> bool:
try:
proc = run(['jbig2dec', '--version'], stdout=PIPE, check=True, encoding='ascii')
diff --git a/src/qpdf/jbig2-inl.h b/src/qpdf/jbig2-inl.h
new file mode 100644
index 0000000..1904f46
--- /dev/null
+++ b/src/qpdf/jbig2-inl.h
@@ -0,0 +1,120 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright (C) 2022, James R. Barlow (https://github.com/jbarlow83/)
+ */
+
+#include "pikepdf.h"
+
+#include <cstdio>
+#include <cstring>
+
+#include <qpdf/Constants.h>
+#include <qpdf/Types.h>
+#include <qpdf/DLL.h>
+#include <qpdf/QPDFExc.hh>
+#include <qpdf/PointerHolder.hh>
+#include <qpdf/Buffer.hh>
+#include <qpdf/QPDF.hh>
+#include <qpdf/QPDFStreamFilter.hh>
+#include <qpdf/QUtil.hh>
+#include <qpdf/Pipeline.hh>
+
+unsigned char *pipeline_caster(const char *s)
+{
+ // QPDF indicates Pipeline::write(unsigned char*) is effectively const
+ // but not actually const for historical reasons, so we can discard the const.
+ // unsigned char* to char* should be safe.
+ return const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(s));
+}
+
+class Pl_JBIG2 : public Pipeline {
+public:
+ Pl_JBIG2(
+ const char *identifier, Pipeline *next, const std::string &jbig2globals = "")
+ : Pipeline(identifier, next), jbig2globals(jbig2globals)
+ {
+ }
+ virtual ~Pl_JBIG2() = default;
+
+ virtual void write(unsigned char *data, size_t len) override
+ {
+ this->ss.write(reinterpret_cast<const char *>(data), len);
+ }
+ virtual void finish() override
+ {
+ std::string data = this->ss.str();
+ if (data.empty()) {
+ if (this->getNext(true))
+ this->getNext()->finish();
+ return;
+ }
+
+ py::bytes pydata = py::bytes(data);
+ py::function extract_jbig2 =
+ py::module_::import("pikepdf.jbig2").attr("extract_jbig2_bytes");
+
+ py::bytes extracted = extract_jbig2(pydata, this->jbig2globals);
+
+ std::string extracted_cpp = std::string(extracted);
+
+ this->getNext()->write(
+ pipeline_caster(extracted_cpp.data()), extracted_cpp.length());
+
+ if (this->getNext(true)) {
+ this->getNext()->finish();
+ }
+ this->ss.clear();
+ }
+
+private:
+ py::bytes jbig2globals;
+ std::stringstream ss;
+};
+
+class JBIG2StreamFilter : public QPDFStreamFilter {
+public:
+ JBIG2StreamFilter() = default;
+ virtual ~JBIG2StreamFilter() = default;
+
+ virtual bool setDecodeParms(QPDFObjectHandle decode_parms) override
+ {
+ try {
+ auto jbig2dec_available =
+ py::module_::import("pikepdf.jbig2").attr("jbig2dec_available");
+ if (!jbig2dec_available())
+ return false;
+
+ auto jbig2globals_obj = decode_parms.getKey("/JBIG2Globals");
+ if (jbig2globals_obj.isNull())
+ return true;
+
+ auto buf = jbig2globals_obj.getStreamData();
+ this->jbig2globals =
+ std::string(reinterpret_cast<char *>(buf->getBuffer()), buf->getSize());
+ return true;
+ } catch (const std::exception &e) {
+ }
+ return false;
+ }
+ virtual Pipeline *getDecodePipeline(Pipeline *next) override
+ {
+ this->pipeline =
+ std::make_shared<Pl_JBIG2>("JBIG2 decode", next, this->jbig2globals);
+ return this->pipeline.get();
+ }
+
+ static std::shared_ptr<JBIG2StreamFilter> factory()
+ {
+ return std::make_shared<JBIG2StreamFilter>();
+ }
+
+ virtual bool isSpecializedCompression() override { return true; }
+ virtual bool isLossyCompression() override { return false; }
+
+private:
+ std::string jbig2globals;
+ std::shared_ptr<Pipeline> pipeline;
+}; \ No newline at end of file
diff --git a/src/qpdf/qpdf.cpp b/src/qpdf/qpdf.cpp
index bf54bdd..f54aa33 100644
--- a/src/qpdf/qpdf.cpp
+++ b/src/qpdf/qpdf.cpp
@@ -32,6 +32,7 @@
#include "qpdf_pagelist.h"
#include "qpdf_inputsource-inl.h"
#include "mmap_inputsource-inl.h"
+#include "jbig2-inl.h"
#include "pipeline.h"
#include "utils.h"
#include "gsl.h"
@@ -473,6 +474,8 @@ void save_pdf(QPDF &q,
void init_qpdf(py::module_ &m)
{
+ QPDF::registerStreamFilter("/JBIG2Decode", &JBIG2StreamFilter::factory);
+
py::enum_<qpdf_object_stream_e>(m, "ObjectStreamMode")
.value("disable", qpdf_object_stream_e::qpdf_o_disable)
.value("preserve", qpdf_object_stream_e::qpdf_o_preserve)