diff options
author | James R. Barlow <james@purplerock.ca> | 2021-08-22 01:24:13 -0700 |
---|---|---|
committer | James R. Barlow <james@purplerock.ca> | 2021-08-22 01:24:13 -0700 |
commit | e825652e46ebae716afde78ed2c74e18f13e4970 (patch) | |
tree | 031525aacb78bc27386912ad1a595d11bb7d6ee4 /src | |
parent | 1765463b4c744f3c0140c2dff0a63bdca8f8febf (diff) |
Add ContentStreamInlineImage alongside ContentStreamInstruction
Diffstat (limited to 'src')
-rw-r--r-- | src/pikepdf/models/image.py | 2 | ||||
-rw-r--r-- | src/qpdf/parsers.cpp | 103 | ||||
-rw-r--r-- | src/qpdf/parsers.h | 19 |
3 files changed, 85 insertions, 39 deletions
diff --git a/src/pikepdf/models/image.py b/src/pikepdf/models/image.py index 4b8c61a..b2c1b69 100644 --- a/src/pikepdf/models/image.py +++ b/src/pikepdf/models/image.py @@ -895,7 +895,7 @@ class PdfInlineImage(PdfImageBase): } REVERSE_ABBREVS = {v: k for k, v in ABBREVS.items()} - def __init__(self, *, image_data, image_object: tuple): + def __init__(self, *, image_data: Object, image_object: tuple): """ Args: image_data: data stream for image, extracted from content stream diff --git a/src/qpdf/parsers.cpp b/src/qpdf/parsers.cpp index 0b83834..dcc8bae 100644 --- a/src/qpdf/parsers.cpp +++ b/src/qpdf/parsers.cpp @@ -47,9 +47,41 @@ std::ostream &operator<<(std::ostream &os, ContentStreamInstruction &csi) return os; } -// ObjectList ContentStreamInstruction::getOperands() { return this->operands; } +ContentStreamInlineImage::ContentStreamInlineImage( + ObjectList image_metadata, QPDFObjectHandle image_data) + : image_metadata(image_metadata), image_data(image_data) +{ +} + +py::object ContentStreamInlineImage::get_inline_image() const +{ + auto PdfInlineImage = py::module_::import("pikepdf").attr("PdfInlineImage"); + auto kwargs = py::dict(); + kwargs["image_data"] = this->image_data; + kwargs["image_object"] = this->image_metadata; + auto iimage = PdfInlineImage(**kwargs); + return iimage; +} + +py::list ContentStreamInlineImage::get_operands() const +{ + auto list = py::list(); + list.append(this->get_inline_image()); + return list; +} + +QPDFObjectHandle ContentStreamInlineImage::get_operator() const +{ + return QPDFObjectHandle::newOperator("INLINE IMAGE"); +} -// QPDFObjectHandle ContentStreamInstruction::getOperator() { return this->operator_; } +std::ostream &operator<<(std::ostream &os, ContentStreamInlineImage &csii) +{ + py::bytes ii_bytes = csii.get_inline_image().attr("unparse")(); + + os << std::string(ii_bytes); + return os; +} OperandGrouper::OperandGrouper(const std::string &operators) : parsing_inline_image(false), count(0) @@ -89,24 +121,9 @@ void OperandGrouper::handleObject(QPDFObjectHandle obj) if (op == "ID") { this->inline_metadata = this->tokens; } else if (op == "EI") { - auto PdfInlineImage = - py::module_::import("pikepdf").attr("PdfInlineImage"); - auto kwargs = py::dict(); - kwargs["image_data"] = this->tokens[0]; - kwargs["image_object"] = this->inline_metadata; - auto iimage = PdfInlineImage(**kwargs); - - // Package as list with single element for consistency - auto iimage_list = py::list(); - iimage_list.append(iimage); - - auto instruction = py::make_tuple( - iimage_list, QPDFObjectHandle::newOperator("INLINE IMAGE")); - - this->instructions.append(instruction); - - this->parsing_inline_image = false; - this->inline_metadata.clear(); + ContentStreamInlineImage csii(this->inline_metadata, this->tokens[0]); + this->instructions.append(csii); + this->inline_metadata = ObjectList(); } } else { ContentStreamInstruction csi(this->tokens, obj); @@ -145,6 +162,13 @@ py::bytes unparse_content_stream(py::iterable contentstream) } catch (py::cast_error &) { } + try { + auto csii = py::cast<ContentStreamInlineImage>(item); + ss << csii; + continue; + } catch (py::cast_error &) { + } + auto operands_op = py::reinterpret_borrow<py::sequence>(item); // First iteration: print nothing @@ -188,20 +212,11 @@ py::bytes unparse_content_stream(py::iterable contentstream) py::object iimage_unparsed_bytes = iimage.attr("unparse")(); ss << std::string(py::bytes(iimage_unparsed_bytes)); } else { - // try { - // // First try direct conversion... - // auto objectlist = - // operands_op[0].cast<std::vector<QPDFObjectHandle>>(); for - // (QPDFObjectHandle &obj : objectlist) { - // ss << obj.unparseBinary() << " "; - // } - // } catch (const py::cast_error &) { auto operands = py::reinterpret_borrow<py::sequence>(operands_op[0]); for (const auto &operand : operands) { QPDFObjectHandle obj = objecthandle_encode(operand); ss << obj.unparseBinary() << " "; } - // } ss << op.unparseBinary(); } @@ -213,12 +228,8 @@ py::bytes unparse_content_stream(py::iterable contentstream) void init_parsers(py::module_ &m) { py::class_<ContentStreamInstruction>(m, "ContentStreamInstruction") - .def_property( - "operator", - [](ContentStreamInstruction &csi) { return csi.operator_; }, - [](ContentStreamInstruction &csi, QPDFObjectHandle op) { - csi.operator_ = op; - }) + .def_property_readonly( + "operator", [](ContentStreamInstruction &csi) { return csi.operator_; }) .def_property( "operands", [](ContentStreamInstruction &csi) { return csi.operands; }, @@ -245,4 +256,26 @@ void init_parsers(py::module_ &m) .def("__repr__", [](ContentStreamInstruction &csi) { return "pikepdf.ContentStreamInstruction()"; }); + + py::class_<ContentStreamInlineImage>(m, "ContentStreamInlineImage") + .def_property_readonly("operator", + [](ContentStreamInlineImage &csii) { + return QPDFObjectHandle::newOperator("INLINE IMAGE"); + }) + .def_property_readonly("operands", + [](ContentStreamInlineImage &csii) { return csii.get_operands(); }) + .def("__getitem__", + [](ContentStreamInlineImage &csii, int index) -> py::object { + if (index == 0 || index == -2) + return csii.get_operands(); + else if (index == 1 || index == -1) + return py::cast(csii.get_operator()); + throw py::index_error( + std::string("Invalid index ") + std::to_string(index)); + }) + .def_property_readonly("iimage", + [](ContentStreamInlineImage &csii) { return csii.get_inline_image(); }) + .def("__repr__", [](ContentStreamInlineImage &csii) { + return "pikepdf.ContentStreamInstruction()"; + }); }
\ No newline at end of file diff --git a/src/qpdf/parsers.h b/src/qpdf/parsers.h index 695c7ad..7cd7600 100644 --- a/src/qpdf/parsers.h +++ b/src/qpdf/parsers.h @@ -30,15 +30,28 @@ public: ContentStreamInstruction(ObjectList operands, QPDFObjectHandle operator_); virtual ~ContentStreamInstruction() = default; - // ObjectList getOperands(); - // QPDFObjectHandle getOperator(); - friend std::ostream &operator<<(std::ostream &os, ContentStreamInstruction &csi); ObjectList operands; QPDFObjectHandle operator_; }; +class ContentStreamInlineImage { +public: + ContentStreamInlineImage(ObjectList image_metadata, QPDFObjectHandle image_data); + virtual ~ContentStreamInlineImage() = default; + + friend std::ostream &operator<<(std::ostream &os, ContentStreamInstruction &csi); + + ObjectList image_metadata; + QPDFObjectHandle image_data; + + py::list get_operands() const; + QPDFObjectHandle get_operator() const; + + py::object get_inline_image() const; +}; + // Used for parse_content_stream. Handles each object by grouping into operands // and operators. The whole parse stream can be retrived at once. class OperandGrouper : public QPDFObjectHandle::ParserCallbacks { |