summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJames R. Barlow <james@purplerock.ca>2021-08-22 01:24:13 -0700
committerJames R. Barlow <james@purplerock.ca>2021-08-22 01:24:13 -0700
commite825652e46ebae716afde78ed2c74e18f13e4970 (patch)
tree031525aacb78bc27386912ad1a595d11bb7d6ee4 /src
parent1765463b4c744f3c0140c2dff0a63bdca8f8febf (diff)
Add ContentStreamInlineImage alongside ContentStreamInstruction
Diffstat (limited to 'src')
-rw-r--r--src/pikepdf/models/image.py2
-rw-r--r--src/qpdf/parsers.cpp103
-rw-r--r--src/qpdf/parsers.h19
3 files changed, 85 insertions, 39 deletions
diff --git a/src/pikepdf/models/image.py b/src/pikepdf/models/image.py
index 4b8c61a..b2c1b69 100644
--- a/src/pikepdf/models/image.py
+++ b/src/pikepdf/models/image.py
@@ -895,7 +895,7 @@ class PdfInlineImage(PdfImageBase):
}
REVERSE_ABBREVS = {v: k for k, v in ABBREVS.items()}
- def __init__(self, *, image_data, image_object: tuple):
+ def __init__(self, *, image_data: Object, image_object: tuple):
"""
Args:
image_data: data stream for image, extracted from content stream
diff --git a/src/qpdf/parsers.cpp b/src/qpdf/parsers.cpp
index 0b83834..dcc8bae 100644
--- a/src/qpdf/parsers.cpp
+++ b/src/qpdf/parsers.cpp
@@ -47,9 +47,41 @@ std::ostream &operator<<(std::ostream &os, ContentStreamInstruction &csi)
return os;
}
-// ObjectList ContentStreamInstruction::getOperands() { return this->operands; }
+ContentStreamInlineImage::ContentStreamInlineImage(
+ ObjectList image_metadata, QPDFObjectHandle image_data)
+ : image_metadata(image_metadata), image_data(image_data)
+{
+}
+
+py::object ContentStreamInlineImage::get_inline_image() const
+{
+ auto PdfInlineImage = py::module_::import("pikepdf").attr("PdfInlineImage");
+ auto kwargs = py::dict();
+ kwargs["image_data"] = this->image_data;
+ kwargs["image_object"] = this->image_metadata;
+ auto iimage = PdfInlineImage(**kwargs);
+ return iimage;
+}
+
+py::list ContentStreamInlineImage::get_operands() const
+{
+ auto list = py::list();
+ list.append(this->get_inline_image());
+ return list;
+}
+
+QPDFObjectHandle ContentStreamInlineImage::get_operator() const
+{
+ return QPDFObjectHandle::newOperator("INLINE IMAGE");
+}
-// QPDFObjectHandle ContentStreamInstruction::getOperator() { return this->operator_; }
+std::ostream &operator<<(std::ostream &os, ContentStreamInlineImage &csii)
+{
+ py::bytes ii_bytes = csii.get_inline_image().attr("unparse")();
+
+ os << std::string(ii_bytes);
+ return os;
+}
OperandGrouper::OperandGrouper(const std::string &operators)
: parsing_inline_image(false), count(0)
@@ -89,24 +121,9 @@ void OperandGrouper::handleObject(QPDFObjectHandle obj)
if (op == "ID") {
this->inline_metadata = this->tokens;
} else if (op == "EI") {
- auto PdfInlineImage =
- py::module_::import("pikepdf").attr("PdfInlineImage");
- auto kwargs = py::dict();
- kwargs["image_data"] = this->tokens[0];
- kwargs["image_object"] = this->inline_metadata;
- auto iimage = PdfInlineImage(**kwargs);
-
- // Package as list with single element for consistency
- auto iimage_list = py::list();
- iimage_list.append(iimage);
-
- auto instruction = py::make_tuple(
- iimage_list, QPDFObjectHandle::newOperator("INLINE IMAGE"));
-
- this->instructions.append(instruction);
-
- this->parsing_inline_image = false;
- this->inline_metadata.clear();
+ ContentStreamInlineImage csii(this->inline_metadata, this->tokens[0]);
+ this->instructions.append(csii);
+ this->inline_metadata = ObjectList();
}
} else {
ContentStreamInstruction csi(this->tokens, obj);
@@ -145,6 +162,13 @@ py::bytes unparse_content_stream(py::iterable contentstream)
} catch (py::cast_error &) {
}
+ try {
+ auto csii = py::cast<ContentStreamInlineImage>(item);
+ ss << csii;
+ continue;
+ } catch (py::cast_error &) {
+ }
+
auto operands_op = py::reinterpret_borrow<py::sequence>(item);
// First iteration: print nothing
@@ -188,20 +212,11 @@ py::bytes unparse_content_stream(py::iterable contentstream)
py::object iimage_unparsed_bytes = iimage.attr("unparse")();
ss << std::string(py::bytes(iimage_unparsed_bytes));
} else {
- // try {
- // // First try direct conversion...
- // auto objectlist =
- // operands_op[0].cast<std::vector<QPDFObjectHandle>>(); for
- // (QPDFObjectHandle &obj : objectlist) {
- // ss << obj.unparseBinary() << " ";
- // }
- // } catch (const py::cast_error &) {
auto operands = py::reinterpret_borrow<py::sequence>(operands_op[0]);
for (const auto &operand : operands) {
QPDFObjectHandle obj = objecthandle_encode(operand);
ss << obj.unparseBinary() << " ";
}
- // }
ss << op.unparseBinary();
}
@@ -213,12 +228,8 @@ py::bytes unparse_content_stream(py::iterable contentstream)
void init_parsers(py::module_ &m)
{
py::class_<ContentStreamInstruction>(m, "ContentStreamInstruction")
- .def_property(
- "operator",
- [](ContentStreamInstruction &csi) { return csi.operator_; },
- [](ContentStreamInstruction &csi, QPDFObjectHandle op) {
- csi.operator_ = op;
- })
+ .def_property_readonly(
+ "operator", [](ContentStreamInstruction &csi) { return csi.operator_; })
.def_property(
"operands",
[](ContentStreamInstruction &csi) { return csi.operands; },
@@ -245,4 +256,26 @@ void init_parsers(py::module_ &m)
.def("__repr__", [](ContentStreamInstruction &csi) {
return "pikepdf.ContentStreamInstruction()";
});
+
+ py::class_<ContentStreamInlineImage>(m, "ContentStreamInlineImage")
+ .def_property_readonly("operator",
+ [](ContentStreamInlineImage &csii) {
+ return QPDFObjectHandle::newOperator("INLINE IMAGE");
+ })
+ .def_property_readonly("operands",
+ [](ContentStreamInlineImage &csii) { return csii.get_operands(); })
+ .def("__getitem__",
+ [](ContentStreamInlineImage &csii, int index) -> py::object {
+ if (index == 0 || index == -2)
+ return csii.get_operands();
+ else if (index == 1 || index == -1)
+ return py::cast(csii.get_operator());
+ throw py::index_error(
+ std::string("Invalid index ") + std::to_string(index));
+ })
+ .def_property_readonly("iimage",
+ [](ContentStreamInlineImage &csii) { return csii.get_inline_image(); })
+ .def("__repr__", [](ContentStreamInlineImage &csii) {
+ return "pikepdf.ContentStreamInstruction()";
+ });
} \ No newline at end of file
diff --git a/src/qpdf/parsers.h b/src/qpdf/parsers.h
index 695c7ad..7cd7600 100644
--- a/src/qpdf/parsers.h
+++ b/src/qpdf/parsers.h
@@ -30,15 +30,28 @@ public:
ContentStreamInstruction(ObjectList operands, QPDFObjectHandle operator_);
virtual ~ContentStreamInstruction() = default;
- // ObjectList getOperands();
- // QPDFObjectHandle getOperator();
-
friend std::ostream &operator<<(std::ostream &os, ContentStreamInstruction &csi);
ObjectList operands;
QPDFObjectHandle operator_;
};
+class ContentStreamInlineImage {
+public:
+ ContentStreamInlineImage(ObjectList image_metadata, QPDFObjectHandle image_data);
+ virtual ~ContentStreamInlineImage() = default;
+
+ friend std::ostream &operator<<(std::ostream &os, ContentStreamInstruction &csi);
+
+ ObjectList image_metadata;
+ QPDFObjectHandle image_data;
+
+ py::list get_operands() const;
+ QPDFObjectHandle get_operator() const;
+
+ py::object get_inline_image() const;
+};
+
// Used for parse_content_stream. Handles each object by grouping into operands
// and operators. The whole parse stream can be retrived at once.
class OperandGrouper : public QPDFObjectHandle::ParserCallbacks {