From 8a47d2b0c2c8539245d5a81e324a94c51418c763 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Sat, 5 Feb 2022 00:02:30 -0800 Subject: Register JBIG2Decode using QPDF StreamFilter More flexible than previous approach. --- src/qpdf/jbig2-inl.h | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/qpdf/qpdf.cpp | 3 ++ 2 files changed, 123 insertions(+) create mode 100644 src/qpdf/jbig2-inl.h (limited to 'src/qpdf') diff --git a/src/qpdf/jbig2-inl.h b/src/qpdf/jbig2-inl.h new file mode 100644 index 0000000..1904f46 --- /dev/null +++ b/src/qpdf/jbig2-inl.h @@ -0,0 +1,120 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * Copyright (C) 2022, James R. Barlow (https://github.com/jbarlow83/) + */ + +#include "pikepdf.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned char *pipeline_caster(const char *s) +{ + // QPDF indicates Pipeline::write(unsigned char*) is effectively const + // but not actually const for historical reasons, so we can discard the const. + // unsigned char* to char* should be safe. + return const_cast(reinterpret_cast(s)); +} + +class Pl_JBIG2 : public Pipeline { +public: + Pl_JBIG2( + const char *identifier, Pipeline *next, const std::string &jbig2globals = "") + : Pipeline(identifier, next), jbig2globals(jbig2globals) + { + } + virtual ~Pl_JBIG2() = default; + + virtual void write(unsigned char *data, size_t len) override + { + this->ss.write(reinterpret_cast(data), len); + } + virtual void finish() override + { + std::string data = this->ss.str(); + if (data.empty()) { + if (this->getNext(true)) + this->getNext()->finish(); + return; + } + + py::bytes pydata = py::bytes(data); + py::function extract_jbig2 = + py::module_::import("pikepdf.jbig2").attr("extract_jbig2_bytes"); + + py::bytes extracted = extract_jbig2(pydata, this->jbig2globals); + + std::string extracted_cpp = std::string(extracted); + + this->getNext()->write( + pipeline_caster(extracted_cpp.data()), extracted_cpp.length()); + + if (this->getNext(true)) { + this->getNext()->finish(); + } + this->ss.clear(); + } + +private: + py::bytes jbig2globals; + std::stringstream ss; +}; + +class JBIG2StreamFilter : public QPDFStreamFilter { +public: + JBIG2StreamFilter() = default; + virtual ~JBIG2StreamFilter() = default; + + virtual bool setDecodeParms(QPDFObjectHandle decode_parms) override + { + try { + auto jbig2dec_available = + py::module_::import("pikepdf.jbig2").attr("jbig2dec_available"); + if (!jbig2dec_available()) + return false; + + auto jbig2globals_obj = decode_parms.getKey("/JBIG2Globals"); + if (jbig2globals_obj.isNull()) + return true; + + auto buf = jbig2globals_obj.getStreamData(); + this->jbig2globals = + std::string(reinterpret_cast(buf->getBuffer()), buf->getSize()); + return true; + } catch (const std::exception &e) { + } + return false; + } + virtual Pipeline *getDecodePipeline(Pipeline *next) override + { + this->pipeline = + std::make_shared("JBIG2 decode", next, this->jbig2globals); + return this->pipeline.get(); + } + + static std::shared_ptr factory() + { + return std::make_shared(); + } + + virtual bool isSpecializedCompression() override { return true; } + virtual bool isLossyCompression() override { return false; } + +private: + std::string jbig2globals; + std::shared_ptr pipeline; +}; \ No newline at end of file diff --git a/src/qpdf/qpdf.cpp b/src/qpdf/qpdf.cpp index bf54bdd..f54aa33 100644 --- a/src/qpdf/qpdf.cpp +++ b/src/qpdf/qpdf.cpp @@ -32,6 +32,7 @@ #include "qpdf_pagelist.h" #include "qpdf_inputsource-inl.h" #include "mmap_inputsource-inl.h" +#include "jbig2-inl.h" #include "pipeline.h" #include "utils.h" #include "gsl.h" @@ -473,6 +474,8 @@ void save_pdf(QPDF &q, void init_qpdf(py::module_ &m) { + QPDF::registerStreamFilter("/JBIG2Decode", &JBIG2StreamFilter::factory); + py::enum_(m, "ObjectStreamMode") .value("disable", qpdf_object_stream_e::qpdf_o_disable) .value("preserve", qpdf_object_stream_e::qpdf_o_preserve) -- cgit v1.2.3