summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES.rst16
-rw-r--r--PKG-INFO34
-rw-r--r--README.md27
-rw-r--r--debian/changelog12
-rw-r--r--debian/patches/disable-gui.patch4
-rw-r--r--debian/patches/imagemagick-issue-28525
-rw-r--r--debian/patches/remove-exact-cmyk8.patch85
-rw-r--r--debian/patches/series2
-rw-r--r--debian/upstream/signing-key.asc110
-rw-r--r--debian/watch2
-rw-r--r--setup.py2
-rw-r--r--src/img2pdf.egg-info/PKG-INFO34
-rw-r--r--src/img2pdf.egg-info/SOURCES.txt1
-rw-r--r--src/img2pdf.egg-info/entry_points.txt1
-rw-r--r--src/img2pdf.egg-info/pbr.json1
-rwxr-xr-xsrc/img2pdf.py546
-rwxr-xr-xsrc/img2pdf_test.py743
-rw-r--r--src/jp2.py55
18 files changed, 1321 insertions, 379 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index 9d6b3f7..df714cf 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,22 @@
CHANGES
=======
+0.5.1 (2023-11-26)
+------------------
+
+ - no default ICC profile location for PDF/A-1b on Windows
+ - workaround for PNG input without dpi units but non-square dpi aspect ratio
+
+0.5.0 (2023-10-28)
+------------------
+
+ - support MIFF for 16 bit CMYK input
+ - accept pathlib.Path objects as input
+ - don't store RGB ICC profiles from bilevel or grayscale TIFF, PNG and JPEG
+ - thumbnails are no longer included by default and --include-thumbnails has to
+ be used if you want them
+ - support for pikepdf (>= 6.2.0)
+
0.4.4 (2022-04-07)
------------------
diff --git a/PKG-INFO b/PKG-INFO
index 4b1c86a..cc05494 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,14 +1,13 @@
Metadata-Version: 2.1
Name: img2pdf
-Version: 0.4.4
+Version: 0.5.1
Summary: Convert images to PDF via direct JPEG inclusion.
Home-page: https://gitlab.mister-muffin.de/josch/img2pdf
+Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.5.1
Author: Johannes Schauer Marin Rodrigues
Author-email: josch@mister-muffin.de
License: LGPL
-Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.4.4
Keywords: jpeg pdf converter
-Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Other Audience
@@ -144,10 +143,9 @@ You can then test the converter using:
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
-For Microsoft Windows users, PyInstaller based .exe files are produced by
-appveyor. If you don't want to install Python before using img2pdf you can head
-to appveyor and click on "Artifacts" to download the latest version:
-https://ci.appveyor.com/project/josch/img2pdf
+If you don't want to setup Python on Windows, then head to the
+[releases](/josch/img2pdf/releases) section and download the latest
+`img2pdf.exe`.
GUI
---
@@ -174,6 +172,10 @@ The package can also be used as a library:
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
f1.write(img2pdf.convert(f2))
+ # opening using pathlib
+ with open("name.pdf","wb") as f:
+ f.write(img2pdf.convert(pathlib.Path('test.jpg')))
+
# using in-memory image data
with open("name.pdf","wb") as f:
f.write(img2pdf.convert("\x89PNG...")
@@ -216,6 +218,11 @@ The package can also be used as a library:
with open("name.pdf","wb") as f:
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
+ # convert all files matching a glob using pathlib.Path
+ from pathlib import Path
+ with open("name.pdf","wb") as f:
+ f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
+
# ignore invalid rotation values in the input images
with open("name.pdf","wb") as f:
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
@@ -327,5 +334,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
input to RGB and removes the alpha channel from images with transparency. For
multipage TIFF or animated GIF, it will only convert the first frame.
-
-
+Comparison to econvert from ExactImage
+--------------------------------------
+
+Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
+directly without re-encoding but when given other file formats, it stores them
+just using flate compressen, which unnecessarily increases the filesize.
+Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
+jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
+to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
+also seems to choose JPEG encoding when using it on some file types (like
+palette images) making it again not lossless for that input as well.
diff --git a/README.md b/README.md
index 1bca7f2..8d33a36 100644
--- a/README.md
+++ b/README.md
@@ -117,10 +117,9 @@ You can then test the converter using:
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
-For Microsoft Windows users, PyInstaller based .exe files are produced by
-appveyor. If you don't want to install Python before using img2pdf you can head
-to appveyor and click on "Artifacts" to download the latest version:
-https://ci.appveyor.com/project/josch/img2pdf
+If you don't want to setup Python on Windows, then head to the
+[releases](/josch/img2pdf/releases) section and download the latest
+`img2pdf.exe`.
GUI
---
@@ -147,6 +146,10 @@ The package can also be used as a library:
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
f1.write(img2pdf.convert(f2))
+ # opening using pathlib
+ with open("name.pdf","wb") as f:
+ f.write(img2pdf.convert(pathlib.Path('test.jpg')))
+
# using in-memory image data
with open("name.pdf","wb") as f:
f.write(img2pdf.convert("\x89PNG...")
@@ -189,6 +192,11 @@ The package can also be used as a library:
with open("name.pdf","wb") as f:
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
+ # convert all files matching a glob using pathlib.Path
+ from pathlib import Path
+ with open("name.pdf","wb") as f:
+ f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
+
# ignore invalid rotation values in the input images
with open("name.pdf","wb") as f:
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
@@ -300,3 +308,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
input to RGB and removes the alpha channel from images with transparency. For
multipage TIFF or animated GIF, it will only convert the first frame.
+Comparison to econvert from ExactImage
+--------------------------------------
+
+Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
+directly without re-encoding but when given other file formats, it stores them
+just using flate compressen, which unnecessarily increases the filesize.
+Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
+jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
+to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
+also seems to choose JPEG encoding when using it on some file types (like
+palette images) making it again not lossless for that input as well.
diff --git a/debian/changelog b/debian/changelog
index 629c724..89dbb07 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,15 @@
+img2pdf (0.5.1-1) unstable; urgency=medium
+
+ * New upstream version 0.5.1
+ - patches for imagemagick in unstable (closes: #1054762)
+ * refresh patches
+ * add debian/patches/remove-exact-cmyk8.patch
+ * add debian/patches/imagemagick-issue-285
+ * delete debian/upstream/signing-key.asc as pypi stopped shipping gpg
+ signatures
+
+ -- Johannes Schauer Marin Rodrigues <josch@debian.org> Sun, 26 Nov 2023 23:17:11 +0100
+
img2pdf (0.4.4-4) unstable; urgency=medium
* add debian/source/options to support running 'dpkg-buildpackage -S' after
diff --git a/debian/patches/disable-gui.patch b/debian/patches/disable-gui.patch
index d573b00..341d64a 100644
--- a/debian/patches/disable-gui.patch
+++ b/debian/patches/disable-gui.patch
@@ -4,7 +4,7 @@ Forwarded: not-needed
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
-@@ -3517,9 +3517,6 @@ Report bugs at https://gitlab.mister-muf
+@@ -3989,9 +3989,6 @@ Report bugs at https://gitlab.mister-muf
help="Prints version information and exits.",
)
parser.add_argument(
@@ -14,7 +14,7 @@ Forwarded: not-needed
"--from-file",
metavar="FILE",
type=from_file,
-@@ -3912,10 +3909,6 @@ and left/right, respectively. It is not
+@@ -4406,10 +4403,6 @@ def main(argv=sys.argv):
if args.pillow_limit_break:
Image.MAX_IMAGE_PIXELS = None
diff --git a/debian/patches/imagemagick-issue-285 b/debian/patches/imagemagick-issue-285
new file mode 100644
index 0000000..28a480f
--- /dev/null
+++ b/debian/patches/imagemagick-issue-285
@@ -0,0 +1,25 @@
+Subject: https://github.com/ImageMagick/ImageMagick6/issues/285
+From: Johannes Schauer Marin Rodrigues <josch@debian.org>
+
+--- a/src/img2pdf_test.py
++++ b/src/img2pdf_test.py
+@@ -5582,6 +5582,9 @@ def test_jpg_2000(tmp_path_factory, jpg_
+ @pytest.mark.skipif(
+ not HAVE_JP2, reason="requires imagemagick with support for jpeg2000"
+ )
++@pytest.mark.skipif(
++ True, reason="https://github.com/ImageMagick/ImageMagick6/issues/285"
++)
+ def test_jpg_2000_rgba8(tmp_path_factory, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf):
+ tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba8")
+ compare_ghostscript(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
+@@ -5597,6 +5600,9 @@ def test_jpg_2000_rgba8(tmp_path_factory
+ @pytest.mark.skipif(
+ not HAVE_JP2, reason="requires imagemagick with support for jpeg2000"
+ )
++@pytest.mark.skipif(
++ True, reason="https://github.com/ImageMagick/ImageMagick6/issues/285"
++)
+ def test_jpg_2000_rgba16(tmp_path_factory, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf):
+ tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba16")
+ compare_ghostscript(
diff --git a/debian/patches/remove-exact-cmyk8.patch b/debian/patches/remove-exact-cmyk8.patch
new file mode 100644
index 0000000..a0f00de
--- /dev/null
+++ b/debian/patches/remove-exact-cmyk8.patch
@@ -0,0 +1,85 @@
+Subject: remove HAVE_EXACT_CMYK8
+From: Johannes Schauer Marin Rodrigues <josch@debian.org>
+
+--- a/src/img2pdf_test.py
++++ b/src/img2pdf_test.py
+@@ -85,7 +85,6 @@ for prog in ["convert", "compare", "iden
+ globals()[prog.upper()] = ["magick", prog]
+
+ HAVE_IMAGEMAGICK_MODERN = True
+-HAVE_EXACT_CMYK8 = True
+ try:
+ ver = subprocess.check_output(CONVERT + ["-version"], stderr=subprocess.STDOUT)
+ m = re.fullmatch(
+@@ -93,18 +92,13 @@ try:
+ )
+ if m is None:
+ HAVE_IMAGEMAGICK_MODERN = False
+- HAVE_EXACT_CMYK8 = False
+ else:
+ if parse_version(m.group(1)) < parse_version("6.9.10-12"):
+ HAVE_IMAGEMAGICK_MODERN = False
+- if parse_version(m.group(1)) < parse_version("7.1.0-48"):
+- HAVE_EXACT_CMYK8 = False
+ except FileNotFoundError:
+ HAVE_IMAGEMAGICK_MODERN = False
+- HAVE_EXACT_CMYK8 = False
+ except subprocess.CalledProcessError:
+ HAVE_IMAGEMAGICK_MODERN = False
+- HAVE_EXACT_CMYK8 = False
+
+ if not HAVE_IMAGEMAGICK_MODERN:
+ warnings.warn("imagemagick >= 6.9.10-12 not available, skipping certain checks...")
+@@ -351,9 +345,7 @@ def write_png(data, path, bitdepth, colo
+
+ def compare(im1, im2, exact, icc, cmyk):
+ if exact:
+- if cmyk and not HAVE_EXACT_CMYK8:
+- raise Exception("cmyk cannot be exact before ImageMagick 7.1.0-48")
+- elif icc:
++ if icc:
+ raise Exception("icc cannot be exact")
+ else:
+ subprocess.check_call(
+@@ -5562,11 +5554,9 @@ def test_jpg_rot(tmp_path_factory, jpg_r
+ )
+ def test_jpg_cmyk(tmp_path_factory, jpg_cmyk_img, jpg_cmyk_pdf):
+ tmpdir = tmp_path_factory.mktemp("jpg_cmyk")
+- compare_ghostscript(
+- tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, gsdevice="tiff32nc", exact=HAVE_EXACT_CMYK8
+- )
++ compare_ghostscript(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, gsdevice="tiff32nc")
+ # not testing with poppler as it cannot write CMYK images
+- compare_mupdf(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, exact=HAVE_EXACT_CMYK8, cmyk=True)
++ compare_mupdf(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, cmyk=True)
+ compare_pdfimages_cmyk(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf)
+
+
+@@ -5957,12 +5947,9 @@ def test_tiff_cmyk8(tmp_path_factory, ti
+ tiff_cmyk8_img,
+ tiff_cmyk8_pdf,
+ gsdevice="tiff32nc",
+- exact=HAVE_EXACT_CMYK8,
+ )
+ # not testing with poppler as it cannot write CMYK images
+- compare_mupdf(
+- tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf, exact=HAVE_EXACT_CMYK8, cmyk=True
+- )
++ compare_mupdf(tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf, cmyk=True)
+ compare_pdfimages_tiff(tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf)
+
+
+@@ -6403,11 +6390,9 @@ def test_tiff_ccitt_nometa2(
+ )
+ def test_miff_cmyk8(tmp_path_factory, miff_cmyk8_img, tiff_cmyk8_img, miff_cmyk8_pdf):
+ tmpdir = tmp_path_factory.mktemp("miff_cmyk8")
+- compare_ghostscript(
+- tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, gsdevice="tiff32nc", exact=False
+- )
++ compare_ghostscript(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, gsdevice="tiff32nc")
+ # not testing with poppler as it cannot write CMYK images
+- compare_mupdf(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, exact=False, cmyk=True)
++ compare_mupdf(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, cmyk=True)
+ compare_pdfimages_tiff(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf)
+
+
diff --git a/debian/patches/series b/debian/patches/series
index b05974a..80f3c3b 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1 +1,3 @@
disable-gui.patch
+remove-exact-cmyk8.patch
+imagemagick-issue-285
diff --git a/debian/upstream/signing-key.asc b/debian/upstream/signing-key.asc
deleted file mode 100644
index 3bd21f3..0000000
--- a/debian/upstream/signing-key.asc
+++ /dev/null
@@ -1,110 +0,0 @@
------BEGIN PGP PUBLIC KEY BLOCK-----
-
-mQINBFHVWP0BEAC4vnKRkDgoQ4JrRHhDrKipbs4I0xwRSDHlhnD1bsa12PNaJytH
-HUufulM5woChwGPFOH0Ex0eOzFWzQ1cHmijIIdm5h9tGSxQK+AF5lh2q9/ae1SXW
-bh9u+6u8PWS1P9nxXMCN9c4ahwUb5YYCH2ThkmlhzvAeX0/hk85zecglsypUfQgO
-9tp72S8CX/Lx0HX0at7xEioKgA39/ZWD4b7FktI3MX+UYMgOXsgsWqmY2gMGUp3E
-3Aa6se6/63nhY3HLCCHUYS3pxP7Cnw5fI3/KJ9yBSGQ8LoNwijJtJD0XWTaUikKy
-+MrifZDpfFIxvo/JJJLOXTi7nEnXZitKV5pz49/6CkhbSdAt423honj+Gn58viUw
-pyMjpfCaZu4/RN8GLDMvlz2etst0HHnINIwQWPrXLubF3jqe8uhseKATO7FkgaOw
-4o6xC+NZuycT7pXsb6m51y/TZfAq/eTP0TE8jMSVf1dpMoyLOcI4VciL26G7uujQ
-qjdBVIgcPnv7XG9y+HqHX49pvTRo0Sum21LpbZRDCeRtYe8flbBvHzM+B+S93xSn
-uppct4BFrKJ2RU7QCIpDOBvfP24cy9Nu+AphScXw5FtQOzKfz3+kosPz3uu0XCUt
-xX7h8s00dT4hQQX/bMwjqa2WJNnoaqg5oIPMRNkZHPuPNcsdobSy3KJuWwARAQAB
-tClKb2hhbm5lcyBTY2hhdWVyIDxqb3NjaEBtaXN0ZXItbXVmZmluLmRlPokCOgQT
-AQgAJAIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAUCUdVbIQIZAQAKCRB9XYxg
-z00+tHLgD/4xU2nGfP187CQm4d4cZG7maaLSWeyffY2UMgBYwppioIv99hPWFV8j
-ePsBsAYGXH0TWaci+mLAiYveirGFnl4atwc9+YvWE5J1zot6nE6cFdv7YM6X4WaO
-/5zjkI0uXXOckfDrT8HbTDhQvyrkLL3NB728lHkPZrTFxrmCJkWZb9zU2X4Mbp/M
-Rhei1OWZu3FiOxc7or5dbfWL+t8XEmQ/CrO96f2rHOjcuk+o//4xcm1uDFbKS385
-OjtE6rcDowWvDGvd1IYIyFdXinCX0vZcTSrsgUknMHpOb//ucKiSC9tjNQlqXyAx
-yok7KAyJrXRflY0ja2EEVywfWaw99H5o5tQq5utNnG/XitS98qEk4w8P6OC4FInr
-uNgxyTcBFBAE/M2HnRCRW+1Tui1KB0gKAY09NhxX++6EbaetXqjiHBLnNBcOaSWZ
-FMjzo/qn7YqRljAlLwHACK03J8yvbcIfFW4uTBpmE2+Y8vR+JaegW2+Xh94OVPeH
-Q8y8GRkCwkZxBxNIZGjjm9MWuccGHxQtzU6balfYyAgoC1HvnSw8parqRW6kkl7h
-TBJVcbJkJ37XRNVtRGPmdg98xrAk2MUZOFMOKBXFQW4e3a3KNRO8zQh7x/tmfG/R
-ti3yIjpHLhGrkwaMOUdAz/Fh43ZNcR8mI2PIF7Aj4eA+/B/dyua9DbQlSm9oYW5u
-ZXMgU2NoYXVlciA8ai5zY2hhdWVyQGVtYWlsLmRlPokCNwQTAQgAIQUCUdVa4QIb
-AwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRB9XYxgz00+tDVlD/9BqE0Kde9M
-oLIBkE2pOtS59yZRQ+omL2twIbLx58cyH1WY+iTtgzctJU2nbbBs2aW/pMVTuSUg
-qEnwM+BO/OM+ltFdwuVbZeO3eTLjkDKbArqnx49os4/TsvgLLzo+VbMRZcuo1C2g
-yJ52/KlQ8pWIr1UoIpNKvIdFTCC96mYZMxiLffYfKnCCOz5SOoYy9atzOZ8CAZOo
-iZSk+ofoR+XP85qWr5/myi6jUV8vzKJdPDDjsFrZhKZNVkDNhfox5epk+nxej+ga
-nF8GPTKA/0IYxbo3Ofbf7ecIBwVtI7U9SzT2TExex++ZlTF3f4WjhzMNX17zD02x
-HFc02GGSbdNzhZAYOZn4hM0qgMkZL7VR1ZT4x89qH58pnzv87VHT5Jfj1r++pMLO
-sewbZAbmMj97EVHTXgZ2VSBza7s1nTd2Te1v/NNnkHIPg096/LdKIRSwZMbR/nN/
-ax5KCqZIVFCFvlPjEXPfCs5Djw93p8c07twG4O1HxnF6cms0LoUIuy86+4o5eq97
-RufcLwSLmu2k5ab4ez89+ZI87yD+70KnixzYA3WKUFmi/ClvGaMS3/N6+th03TkC
-xiiYA9ZJyyKssO2WgiII3vFZup1dZkDFWjje5aBhtGyFJikWsPJjT203ZuxvqJ5Z
-2S+driyy3Vc6kxdiTSs0sqTULKAdmlzSybQjSm9oYW5uZXMgU2NoYXVlciA8am9z
-Y2hAZGViaWFuLm9yZz6JAjcEEwEIACEFAlTSH1oCGwMFCwkIBwMFFQoJCAsFFgID
-AQACHgECF4AACgkQfV2MYM9NPrRBJxAAheF1zMIsWRSMtmZaTe0gr7f5pjcxaD2a
-CaeVQg5PAlx+cLF0/AYVbFDVzAXQq+jefgnddqw897ZAL8SsCUIcTLurvO3q4qx1
-hg+kqhVzFyji3S49i3jlB4ozmA1VMZKGhqzLDN/UYIXwfvkLmp4TVw0KMy7Fmmrl
-aIYcRexBsyf8u3r6OuorPJHRxxN2cIMWLEtXhOYSk8tOzMpKBgj5Ki4g6ORW5+3T
-SBTBpwlZAuRb5cGmiiRdgc/R/gTGlMYJr2OB/s/38n1K8qvcS2JBXfN29MAJDeDR
-kNKiI3Xe4S3f7bdcuPBuh45OFWM/vOt1jhsqzVR0EFb74ok4M/f3mnKeZzMlVnGd
-XoePzx6Z4Ocva029jaD8utnl66Pd/6O8lqwpriDurnFQ495zchWssn0RCcv0CmkK
-8qj9+ru6TOyAarBKbu9C1hDlIGkBaYlM/jVm+e+anoki8nbulWG4YHH5KykQWyW4
-KjqI1k3XItLZJzYUJv2xLmYlAo+5f/aWh5aNuorGOGaKg8FIFyUysnePxSHmeL33
-kP6GO16EZDUNsrJMkFo0qfMyJm3wrDTqe4BzYMj3lAT2FQ1PvpuLuQdnN+iplUWs
-XOr14BJfBMbr5Gdy9ADOL5RvkY/5zEZDAI2OGgOCyB6ut8j+F7IPjaKOWaRxjsBq
-UvodwoeUt6W5Ag0EUdVY/QEQANiGeE/TTLZt7x/14zmvM+6vRh4VpX4eovu74RMp
-n62hVE17vTuCRTh+1IiFWb3URT9ciQsqWdJxHE4omJWTHSZJP/QnCYgqBjBZqQZ3
-TLo+B46f5ZDgF7Sojc1HcNIL2gE3ZWubQfFNjnJF3nhLh6L+mql9k4I8Nfy9Q0P/
-+9ob/Ze+AQuG+ZKS4AKb/QO6D+jePonSS49F6cbCZWQy0gQAAGHKmCgWzJ7Xy464
-6+KCMO2a8+4N5TSh3g7nemMS58Mx7mX7g9ong/v7r5/ee/04xcxdcrzbvIqf+6+s
-y/NOkwCf53SpuEorlKNfkoiow99FBjqlGFI/x9J7lTKdxwBmUkQPVVeFJtm/edr+
-kH/sfG3Vc+eNgniLdiQC/BUeTA5jHKOMp6rJmfwsOS3N82EktL28ztCdxYWN+EQM
-twQ39E0f/jYYqWpRflobvjhkTLO1flyNH9rbW3OEdF/tGBnJVm3+bqz2ZjGfUBwM
-X4neID/2mh7BBIBe4wAljUq3AXLeKlIHwBcoNHN8bIBI371rwgMK0sjeAxrQ8awN
-YRz4G3WaU5OLxZOBqbD3t3S2fbgI3LXmBtn+c6OlVRyKDkVyrh7uvtlN5kW9zdTA
-ri1gvADd2mDPSU+DzlTyPXiMb5IlwG/nglVo3beCygfUFi62gPR+eAMPsq4r/yly
-+y3XABEBAAGJAh8EGAEIAAkFAlHVWP0CGwwACgkQfV2MYM9NPrSrRRAAsOFu2Cfz
-mDbbMrgtXzBSS2hgD/MD+E62dDcEw6nO5HvSwBET1SmaEF7D9Lt9rQYsCEZew4A1
-b8bnUzwDfM/3DMZdw/j8HH6pJtjHjfQLNMql4adgQ/kCESXtJAB00lr6AaFAaiQH
-A09l4TxprugoHfi+iriZWaq35YCIIDRnnXeaMJxRb9I0eCT7d4LKsDBd1xrxDc1T
-Uf+AQluYoowmVaicuCLXkKenLMPmU+goXfDPPldjpVyDKiWjDCqwXnZdinILSWkR
-go3s0BK8sP8cG4LY6hlG+56u6RolCp1AOXf6qsO25XsYgsTdazOtd1o2YPxMtbeL
-ZC+0xwTpfxz1uN2TP0FkIM1gPOOp5MnVZf1S0R39uRZn0rzby3ExoBaHgFDiqUOG
-YPuiL/HFKcatLt728TQVw/ylZdgOSBm293vhAlHS+gUch6DuAGErQDHUt0IaWZuR
-L/Z9S82hu3yomD14JL6sM7qdFLygooQKFI5xFpRDn+2IoQN3dhdQKI2WtfcqVg5M
-FKMPD6oIDNjqkuMIHxWqRrfKQIWIglVGKhqnlUGIBWksEBOOJVad+DhZ+FXdVNQl
-R9IAbKq99Qu3ylF6FpbJ72RRh4PCG5N/6TrMkhiHiAa7qa3s/ka9EpaEWfXRy3kn
-dQ0zzLIA1ubSYO5pTm/HRqV0NxT1AFZSoOe5Ag0EUdVcRQEQALzAiJCMWIVb+rx9
-b8TeHvWkSdGX/bsNZuEwDrKaEW8DgBi5BpPGpSPaxzGphdorO0kiyWL+YE45q3x0
-n1gTPT4M3pUhEBL6l+CPFaMYQ3Zc0OKJJ41KY2k5Ot3hxtfhtYvtwiVl6mi+Mvd8
-Wq0zygYmI2w3JDSjmUHmfmloEK0zByrFaUf3+IPtP0T5Jpq5/+/+cVr5KYPuS3oV
-3NAfSJkkhF3KOouP6RkdwL1FrhQihEI4gjN7YO7aNh84+ERH5JvSl1KTwt6VaV4N
-jcjua1GUIthehSFXItp7R6XjcTvnOwvrtsBKbuyRfhRiy839GZnopKcotg1gjTc7
-scaCIkXQwCWQxpZc1oH9mJPLTdx25GtK23IkSroO6RPcnOVqWGDenwsYlN9+XCZf
-XBa4fPjlhOxHN5s+LNA6d7PspAVlzy10SicUbHsjM52K6Wob3VsfoN/tKP2Rvhv5
-ISDV0FNlMXrL5t7fehj06iyaLANnEu/svWkQREZiWiIzd44noY1ZoDYKL8PYYYbZ
-2eH1txCBypZWjse4s2waBzUpLGhZ5k/ahTodTqqcGK2p/5mhipeYj9BGk1GnZfYD
-XqIEKKDF8cnlU7wFXwaovInuhbTysLKrKlyDX0kChCZjSc249FY+uM1GnoEqys61
-ZsVhHYhKl+/wSp42r/RvIqYDB7pHABEBAAGJBD4EGAEIAAkFAlHVXEUCGwICKQkQ
-fV2MYM9NPrTBXSAEGQEIAAYFAlHVXEUACgkQ8sulx4+9g+GpTw//VoJA2lCQ7jXa
-juM0ze3zkWdye5TL5871hnFl/a3i3v9WWj+8fEva2yAjY0WVXH9/pWdoqX7rtPXg
-BJ6C96MijzWDDao9fZZkLLZVuyNyNakB02sceuXIC50mo/OnmNDUvTh9WL1w6z8E
-LMdNw/Vd3030GxQ19ySc3SruDLx8qZeDzp7pE69cYmXmJa+rkH92tchQRQrxM6EN
-qN9O2ZAIEx2XAIg61D5DOvmT5SY3rRJpGApeGuPo1rpQlgxWZL+jQk0BT5Ix7Ojn
-vv2c94XYfB7sgZ24Sp0VP553fPjMu4cUXKTR5fiRL1bNWBAPTGd1t6XkUzfoUnot
-ahvqi3DJJXpn4adJMzJ54nZtjp/++Gb+GCCxhfDFZQvLXPLJfeNGciuGESVN4xl9
-o1G6sb9ohNSnDhAoR8TRDclON8I895OsQblDwGAdqMna7SwzE6hB/9K7+z82IEKJ
-nJC6zfjyD2WuMQUdORPn7MuH1ZuHakPllH+zC8llhozA87Cw95lyLA8v6LzQqXtM
-cLpG1Aqp3ncVbFplmXwGJMSBZnzOuZcgeqVUJ0J9BKVUKDOhjfnW+sc5RMN5zAW7
-L7XtTMU63QFFciSXiK41PD39waVfo9xV//urgAZbSXG6KNAcy1NjMlfV0S8Gh1qk
-jIAa3ig8JTIB53WhI2FPZXYXsx62fXHl3Q//ZI4biIKsi9BP1qFEz9hV0sSnUIt8
-r+BRoBcBhG/OzCBjestnm2zw1MFKkUcCTm9SpfvA77cEJrUPdlWH72+XkxPvhzwn
-LkWBdTm1ZOZbJvWulUOY47+JsVl7YNhTvk9eIssJ4qd0EWEmdFVDAWtyEIeymsz5
-bfiSPfq8dmSk9uTS/kuhU9bHhh404dlrpEPcgzFOVwx6mW33HcAsRtasTdqKH38i
-ZbBeaPp1mYTXGrPyG/KKs0lsAdTg0fjUqmo4K7Qa1c+a3M7gjMGejKPmBnQMNL4h
-PSK1+ejuOQRaNnbd3v9VYuFbtG/A9E36Jb00DxAGLhW/hmcsvYdZ4D48XLKymYOa
-zeRPtYJ4TsBGm+3BZzgXHnHQXRrG3iJZEeM1E+XHTnP04SDTaDCIAr03YMmK/nWO
-Q14K7QihDdmxIb3g5Qs4TXmh7B33sBtfGFla0pt0lNYPx3v85IlWOBQsDN6QLpM/
-KAEueXdSpK//Je+d3pbx2f79vD/k/1/ZeFyQ/FSFj+nJcAwmfZHOrzxhTV4ioZ8R
-va8FK3KkJGhJwW58N0IXaE3kx+f2gp4XaIBzdMl+XpUMGFaMN+knjcfiCdT+NtiO
-4ZKfMf0dEA/PJJcBgL/BaZWQznP0zZUXxkd/KDUBNZ3gjfePgZMno/vjY0YsdZC1
-G3jSKFcffBYpTE4=
-=UNfd
------END PGP PUBLIC KEY BLOCK-----
diff --git a/debian/watch b/debian/watch
index e457d4e..9ee5474 100644
--- a/debian/watch
+++ b/debian/watch
@@ -1,4 +1,4 @@
# please also check http://pypi.debian.net/img2pdf/watch
version=4
-opts=uversionmangle=s/(rc|a|b|c)/~$1/,pgpsigurlmangle=s/$/.asc/ \
+opts=uversionmangle=s/(rc|a|b|c)/~$1/ \
https://pypi.debian.net/img2pdf/img2pdf-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff --git a/setup.py b/setup.py
index 57a34af..2b7c3e0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
import sys
from setuptools import setup
-VERSION = "0.4.4"
+VERSION = "0.5.1"
INSTALL_REQUIRES = (
"Pillow",
diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO
index 4b1c86a..cc05494 100644
--- a/src/img2pdf.egg-info/PKG-INFO
+++ b/src/img2pdf.egg-info/PKG-INFO
@@ -1,14 +1,13 @@
Metadata-Version: 2.1
Name: img2pdf
-Version: 0.4.4
+Version: 0.5.1
Summary: Convert images to PDF via direct JPEG inclusion.
Home-page: https://gitlab.mister-muffin.de/josch/img2pdf
+Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.5.1
Author: Johannes Schauer Marin Rodrigues
Author-email: josch@mister-muffin.de
License: LGPL
-Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.4.4
Keywords: jpeg pdf converter
-Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Other Audience
@@ -144,10 +143,9 @@ You can then test the converter using:
$ ve/bin/img2pdf -o test.pdf src/tests/test.jpg
-For Microsoft Windows users, PyInstaller based .exe files are produced by
-appveyor. If you don't want to install Python before using img2pdf you can head
-to appveyor and click on "Artifacts" to download the latest version:
-https://ci.appveyor.com/project/josch/img2pdf
+If you don't want to setup Python on Windows, then head to the
+[releases](/josch/img2pdf/releases) section and download the latest
+`img2pdf.exe`.
GUI
---
@@ -174,6 +172,10 @@ The package can also be used as a library:
with open("name.pdf","wb") as f1, open("test.jpg") as f2:
f1.write(img2pdf.convert(f2))
+ # opening using pathlib
+ with open("name.pdf","wb") as f:
+ f.write(img2pdf.convert(pathlib.Path('test.jpg')))
+
# using in-memory image data
with open("name.pdf","wb") as f:
f.write(img2pdf.convert("\x89PNG...")
@@ -216,6 +218,11 @@ The package can also be used as a library:
with open("name.pdf","wb") as f:
f.write(img2pdf.convert(glob.glob("/path/to/*.jpg")))
+ # convert all files matching a glob using pathlib.Path
+ from pathlib import Path
+ with open("name.pdf","wb") as f:
+ f.write(img2pdf.convert(*Path("/path").glob("**/*.jpg")))
+
# ignore invalid rotation values in the input images
with open("name.pdf","wb") as f:
f.write(img2pdf.convert('test.jpg'), rotation=img2pdf.Rotation.ifvalid)
@@ -327,5 +334,14 @@ Tesseract might not do a lossless conversion. For example it converts CMYK
input to RGB and removes the alpha channel from images with transparency. For
multipage TIFF or animated GIF, it will only convert the first frame.
-
-
+Comparison to econvert from ExactImage
+--------------------------------------
+
+Like pdflatex and podofoimg2pf, econvert is able to embed JPEG images into PDF
+directly without re-encoding but when given other file formats, it stores them
+just using flate compressen, which unnecessarily increases the filesize.
+Furthermore, it throws an error with CMYK TIF input. It also doesn't store CMYK
+jpeg files as CMYK but converts them to RGB, so it's not lossless. When trying
+to feed it 16bit files, it errors out with Unhandled bps/spp combination. It
+also seems to choose JPEG encoding when using it on some file types (like
+palette images) making it again not lossless for that input as well.
diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt
index 4d8a27e..5648504 100644
--- a/src/img2pdf.egg-info/SOURCES.txt
+++ b/src/img2pdf.egg-info/SOURCES.txt
@@ -11,7 +11,6 @@ src/img2pdf.egg-info/PKG-INFO
src/img2pdf.egg-info/SOURCES.txt
src/img2pdf.egg-info/dependency_links.txt
src/img2pdf.egg-info/entry_points.txt
-src/img2pdf.egg-info/pbr.json
src/img2pdf.egg-info/requires.txt
src/img2pdf.egg-info/top_level.txt
src/img2pdf.egg-info/zip-safe
diff --git a/src/img2pdf.egg-info/entry_points.txt b/src/img2pdf.egg-info/entry_points.txt
index 25efe55..e9e721b 100644
--- a/src/img2pdf.egg-info/entry_points.txt
+++ b/src/img2pdf.egg-info/entry_points.txt
@@ -6,4 +6,3 @@ img2pdf-gui = img2pdf:gui
[setuptools.installation]
eggsecutable = img2pdf:main
-
diff --git a/src/img2pdf.egg-info/pbr.json b/src/img2pdf.egg-info/pbr.json
deleted file mode 100644
index bc27bf9..0000000
--- a/src/img2pdf.egg-info/pbr.json
+++ /dev/null
@@ -1 +0,0 @@
-{"is_release": false, "git_version": "d78b2cb"} \ No newline at end of file
diff --git a/src/img2pdf.py b/src/img2pdf.py
index 8836f8d..56db773 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -22,7 +22,7 @@ import sys
import os
import zlib
import argparse
-from PIL import Image, TiffImagePlugin, GifImagePlugin
+from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
if hasattr(GifImagePlugin, "LoadingStrategy"):
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@@ -36,8 +36,8 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
# TiffImagePlugin.DEBUG = True
from PIL.ExifTags import TAGS
-from datetime import datetime
-from jp2 import parsejp2
+from datetime import datetime, timezone
+import jp2
from enum import Enum
from io import BytesIO
import logging
@@ -45,6 +45,8 @@ import struct
import platform
import hashlib
from itertools import chain
+import re
+import io
logger = logging.getLogger(__name__)
@@ -60,7 +62,7 @@ try:
except ImportError:
have_pikepdf = False
-__version__ = "0.4.4"
+__version__ = "0.5.1"
default_dpi = 96.0
papersizes = {
"letter": "8.5inx11in",
@@ -125,7 +127,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
-ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")
+ImageFormat = Enum(
+ "ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
+)
PageMode = Enum("PageMode", "none outlines thumbs")
@@ -442,7 +446,7 @@ class temp_attr:
if hasattr(self.obj, self.field):
self.exists = True
self.old_value = getattr(self.obj, self.field)
- print(f"setting {self.obj}.{self.field} = {self.value}")
+ logger.debug(f"setting {self.obj}.{self.field} = {self.value}")
setattr(self.obj, self.field, self.value)
def __exit__(self, exctype, excinst, exctb):
@@ -718,7 +722,7 @@ class pdfdoc(object):
self.writer.docinfo = PdfDict(indirect=True)
def datetime_to_pdfdate(dt):
- return dt.strftime("%Y%m%d%H%M%SZ")
+ return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
v = locals()[k.lower()]
@@ -728,7 +732,7 @@ class pdfdoc(object):
v = PdfString.encode(v)
self.writer.docinfo[getattr(PdfName, k)] = v
- now = datetime.now()
+ now = datetime.now().astimezone()
for k in ["CreationDate", "ModDate"]:
v = locals()[k.lower()]
if v is None and nodate:
@@ -748,7 +752,7 @@ class pdfdoc(object):
)
def datetime_to_xmpdate(dt):
- return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+ return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
@@ -823,8 +827,10 @@ class pdfdoc(object):
artborder=None,
iccp=None,
):
- assert (color != Colorspace.RGBA and color != Colorspace.LA) or (
- imgformat == ImageFormat.PNG and smaskdata is not None
+ assert (
+ color not in [Colorspace.RGBA, Colorspace.LA]
+ or (imgformat == ImageFormat.PNG and smaskdata is not None)
+ or imgformat == ImageFormat.JPEG2000
)
if self.engine == Engine.pikepdf:
@@ -848,7 +854,13 @@ class pdfdoc(object):
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
colorspace = PdfName.DeviceGray
elif color == Colorspace.RGB or color == Colorspace.RGBA:
- colorspace = PdfName.DeviceRGB
+ if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000:
+ # there is no DeviceRGBA and for JPXDecode it is okay to have
+ # no colorspace as the pdf reader is supposed to get this info
+ # from the jpeg2000 payload itself
+ colorspace = None
+ else:
+ colorspace = PdfName.DeviceRGB
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
colorspace = PdfName.DeviceCMYK
elif color == Colorspace.P:
@@ -919,7 +931,8 @@ class pdfdoc(object):
image[PdfName.Filter] = ofilter
image[PdfName.Width] = imgwidthpx
image[PdfName.Height] = imgheightpx
- image[PdfName.ColorSpace] = colorspace
+ if colorspace is not None:
+ image[PdfName.ColorSpace] = colorspace
image[PdfName.BitsPerComponent] = depth
smask = None
@@ -1256,8 +1269,11 @@ class pdfdoc(object):
# now write out the PDF
if self.engine == Engine.pikepdf:
+ kwargs = {}
+ if pikepdf.__version__ >= "6.2.0":
+ kwargs["deterministic_id"] = True
self.writer.save(
- outputstream, min_version=self.output_version, linearize=True
+ outputstream, min_version=self.output_version, linearize=True, **kwargs
)
elif self.engine == Engine.pdfrw:
self.writer.trailer.Info = self.writer.docinfo
@@ -1285,7 +1301,7 @@ def get_imgmetadata(
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
# this codepath gets called if the PIL installation is not able to
# handle JPEG2000 files
- imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
+ imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
if hdpi is None:
hdpi = default_dpi
@@ -1295,7 +1311,19 @@ def get_imgmetadata(
else:
imgwidthpx, imgheightpx = imgdata.size
- ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
+ ndpi = imgdata.info.get("dpi")
+ if ndpi is None:
+ # the PNG plugin of PIL adds the undocumented "aspect" field instead of
+ # the "dpi" field if the PNG pHYs chunk unit is not set to meters
+ if imgformat == ImageFormat.PNG and imgdata.info.get("aspect") is not None:
+ aspect = imgdata.info["aspect"]
+ # make sure not to go below the default dpi
+ if aspect[0] > aspect[1]:
+ ndpi = (default_dpi * aspect[0] / aspect[1], default_dpi)
+ else:
+ ndpi = (default_dpi, default_dpi * aspect[1] / aspect[0])
+ else:
+ ndpi = (default_dpi, default_dpi)
# In python3, the returned dpi value for some tiff images will
# not be an integer but a float. To make the behaviour of
# img2pdf the same between python2 and python3, we convert that
@@ -1305,7 +1333,7 @@ def get_imgmetadata(
ics = imgdata.mode
# GIF and PNG files with transparency are supported
- if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and (
+ if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
):
# Must check the IHDR chunk for the bit depth, because PIL would lossily
@@ -1315,6 +1343,10 @@ def get_imgmetadata(
if depth > 8:
logger.warning("Image with transparency and a bit depth of %d." % depth)
logger.warning("This is unsupported due to PIL limitations.")
+ logger.warning(
+ "If you accept a lossy conversion, you can manually convert "
+ "your images to 8 bit using `convert -depth 8` from imagemagick"
+ )
raise AlphaChannelError(
"Refusing to work with multiple >8bit channels."
)
@@ -1425,6 +1457,53 @@ def get_imgmetadata(
iccp = None
if "icc_profile" in imgdata.info:
iccp = imgdata.info.get("icc_profile")
+ # GIMP saves bilevel TIFF images and palette PNG images with only black and
+ # white in the palette with an RGB ICC profile which is useless
+ # https://gitlab.gnome.org/GNOME/gimp/-/issues/3438
+ # and produces an error in Adobe Acrobat, so we ignore it with a warning.
+ # imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
+ # images: https://github.com/ImageMagick/ImageMagick/issues/2070
+ if iccp is not None and (
+ (color == Colorspace["1"] and imgformat == ImageFormat.TIFF)
+ or (
+ imgformat == ImageFormat.PNG
+ and color == Colorspace.P
+ and rawdata is not None
+ and parse_png(rawdata)[1]
+ in [b"\x00\x00\x00\xff\xff\xff", b"\xff\xff\xff\x00\x00\x00"]
+ )
+ ):
+ with io.BytesIO(iccp) as f:
+ prf = ImageCms.ImageCmsProfile(f)
+ if (
+ prf.profile.model == "sRGB"
+ and prf.profile.manufacturer == "GIMP"
+ and prf.profile.profile_description == "GIMP built-in sRGB"
+ ):
+ if imgformat == ImageFormat.TIFF:
+ logger.warning(
+ "Ignoring RGB ICC profile in bilevel TIFF produced by GIMP."
+ )
+ elif imgformat == ImageFormat.PNG:
+ logger.warning(
+ "Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP."
+ )
+ logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
+ iccp = None
+ # SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone
+ # with an RGB ICC profile which is useless.
+ # This produces an error in Adobe Acrobat, so we ignore it with a warning.
+ # Update: Found another case, the JPG is created by Adobe PhotoShop, so we
+ # don't check software anymore.
+ if iccp is not None and (
+ (color == Colorspace["L"] and imgformat == ImageFormat.JPEG)
+ ):
+ with io.BytesIO(iccp) as f:
+ prf = ImageCms.ImageCmsProfile(f)
+
+ if prf.profile.xcolor_space not in ("GRAY"):
+ logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG")
+ iccp = None
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
@@ -1533,7 +1612,204 @@ def parse_png(rawdata):
return pngidat, palette
-def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
+miff_re = re.compile(
+ r"""
+ [^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
+ [^=]+ # the field name can even contain spaces
+ = # field name and value are separated by an equal sign
+ (?:
+ [^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
+ |{[^}]*} # or any kind of char surrounded by braces
+ )+""",
+ re.VERBOSE,
+)
+
+# https://imagemagick.org/script/miff.php
+# turn off black formatting until python 3.10 is available on more platforms
+# and we can use match/case
+# fmt: off
+def parse_miff(data):
+ results = []
+ header, rest = data.split(b":\x1a", 1)
+ header = header.decode("ISO-8859-1")
+ assert header.lower().startswith("id=imagemagick")
+ hdata = {}
+ for i, line in enumerate(re.findall(miff_re, header)):
+ if not line:
+ continue
+ k, v = line.split("=", 1)
+ if i == 0:
+ assert k.lower() == "id"
+ assert v.lower() == "imagemagick"
+ #match k.lower():
+ # case "class":
+ if k.lower() == "class":
+ #match v:
+ # case "DirectClass" | "PseudoClass":
+ if v in ["DirectClass", "PseudoClass"]:
+ hdata["class"] = v
+ # case _:
+ else:
+ print("cannot understand class", v)
+ # case "colorspace":
+ elif k.lower() == "colorspace":
+ # theoretically RGBA and CMYKA should be supported as well
+ # please teach me how to create such a MIFF file
+ #match v:
+ # case "sRGB" | "CMYK" | "Gray":
+ if v in ["sRGB", "CMYK", "Gray"]:
+ hdata["colorspace"] = v
+ # case _:
+ else:
+ print("cannot understand colorspace", v)
+ # case "depth":
+ elif k.lower() == "depth":
+ #match v:
+ # case "8" | "16" | "32":
+ if v in ["8", "16", "32"]:
+ hdata["depth"] = int(v)
+ # case _:
+ else:
+ print("cannot understand depth", v)
+ # case "colors":
+ elif k.lower() == "colors":
+ hdata["colors"] = int(v)
+ # case "matte":
+ elif k.lower() == "matte":
+ #match v:
+ # case "True":
+ if v == "True":
+ hdata["matte"] = True
+ # case "False":
+ elif v == "False":
+ hdata["matte"] = False
+ # case _:
+ else:
+ print("cannot understand matte", v)
+ # case "columns" | "rows":
+ elif k.lower() in ["columns", "rows"]:
+ hdata[k.lower()] = int(v)
+ # case "compression":
+ elif k.lower() == "compression":
+ print("compression not yet supported")
+ # case "profile":
+ elif k.lower() == "profile":
+ assert v in ["icc", "exif"]
+ hdata["profile"] = v
+ # case "resolution":
+ elif k.lower() == "resolution":
+ dpix, dpiy = v.split("x", 1)
+ hdata["resolution"] = (float(dpix), float(dpiy))
+
+ assert "depth" in hdata
+ assert "columns" in hdata
+ assert "rows" in hdata
+ #match hdata["class"]:
+ # case "DirectClass":
+ if hdata["class"] == "DirectClass":
+ if "colors" in hdata:
+ assert hdata["colors"] == 0
+ #match hdata["colorspace"]:
+ # case "sRGB":
+ if hdata["colorspace"] == "sRGB":
+ numchannels = 3
+ colorspace = Colorspace.RGB
+ # case "CMYK":
+ elif hdata["colorspace"] == "CMYK":
+ numchannels = 4
+ colorspace = Colorspace.CMYK
+ # case "Gray":
+ elif hdata["colorspace"] == "Gray":
+ numchannels = 1
+ colorspace = Colorspace.L
+ if hdata.get("matte"):
+ numchannels += 1
+ if hdata.get("profile"):
+ # there is no key encoding the length of icc or exif data
+ # according to the docs, the profile-icc key is supposed to do this
+ print("FAIL: exif")
+ else:
+ lenimgdata = (
+ hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
+ )
+ assert len(rest) >= lenimgdata, (
+ len(rest),
+ hdata["depth"],
+ numchannels,
+ hdata["columns"],
+ hdata["rows"],
+ lenimgdata,
+ )
+ if colorspace == Colorspace.RGB and hdata["depth"] == 8:
+ newimg = Image.frombytes("RGB", (hdata["columns"], hdata["rows"]), rest[:lenimgdata])
+ imgdata, palette, depth = to_png_data(newimg)
+ assert palette == b""
+ assert depth == hdata["depth"]
+ imgfmt = ImageFormat.PNG
+ else:
+ imgdata = zlib.compress(rest[:lenimgdata])
+ imgfmt = ImageFormat.MIFF
+ results.append(
+ (
+ colorspace,
+ hdata.get("resolution") or (default_dpi, default_dpi),
+ imgfmt,
+ imgdata,
+ None, # smask
+ hdata["columns"],
+ hdata["rows"],
+ [], # palette
+ False, # inverted
+ hdata["depth"],
+ 0, # rotation
+ None, # icc profile
+ )
+ )
+ if len(rest) > lenimgdata:
+ # another image is here
+ assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
+ results.extend(parse_miff(rest[lenimgdata:]))
+ # case "PseudoClass":
+ elif hdata["class"] == "PseudoClass":
+ assert "colors" in hdata
+ if hdata.get("matte"):
+ numchannels = 2
+ else:
+ numchannels = 1
+ lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
+ lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
+ assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
+ results.append(
+ (
+ Colorspace.RGB,
+ hdata.get("resolution") or (default_dpi, default_dpi),
+ ImageFormat.MIFF,
+ zlib.compress(rest[lenpal : lenpal + lenimgdata]),
+ None, # FIXME: allow alpha channel smask
+ hdata["columns"],
+ hdata["rows"],
+ rest[:lenpal], # palette
+ False, # inverted
+ hdata["depth"],
+ 0, # rotation
+ None, # icc profile
+ )
+ )
+ if len(rest) > lenpal + lenimgdata:
+ # another image is here
+ assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
+ len(rest),
+ lenpal,
+ lenimgdata,
+ )
+ results.extend(parse_miff(rest[lenpal + lenimgdata :]))
+ return results
+# fmt: on
+
+
+def read_images(
+ rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
+):
im = BytesIO(rawdata)
im.seek(0)
imgdata = None
@@ -1541,13 +1817,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgdata = Image.open(im)
except IOError as e:
# test if it is a jpeg2000 image
- if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+ if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+ # image is jpeg2000
+ imgformat = ImageFormat.JPEG2000
+ if rawdata[:14].lower() == b"id=imagemagick":
+ # image is in MIFF format
+ # this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
+ # we need PIL but PIL cannot do 16 bit
+ imgformat = ImageFormat.MIFF
+ else:
raise ImageOpenError(
"cannot read input image (not jpeg2000). "
"PIL: error reading image: %s" % e
)
- # image is jpeg2000
- imgformat = ImageFormat.JPEG2000
else:
logger.debug("PIL format = %s", imgdata.format)
imgformat = None
@@ -1581,10 +1863,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
raise JpegColorspaceError("jpeg can't be monochrome")
if color == Colorspace["P"]:
raise JpegColorspaceError("jpeg can't have a color palette")
- if color == Colorspace["RGBA"]:
+ if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000:
raise JpegColorspaceError("jpeg can't have an alpha channel")
logger.debug("read_images() embeds a JPEG")
cleanup()
+ depth = 8
+ if imgformat == ImageFormat.JPEG2000:
+ *_, depth = jp2.parse(rawdata)
return [
(
color,
@@ -1596,7 +1881,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgheightpx,
[],
False,
- 8,
+ depth,
rotation,
iccp,
)
@@ -1613,6 +1898,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
if imgformat == ImageFormat.MPO:
result = []
img_page_count = 0
+ assert len(imgdata._MpoImageFile__mpoffsets) == len(imgdata.mpinfo[0xB002])
+ num_frames = len(imgdata.mpinfo[0xB002])
+ # An MPO file can be a main image together with one or more thumbnails
+ # if that is the case, then we only include all frames if the
+ # --include-thumbnails option is given. If it is not, such an MPO file
+ # will be embedded as is, so including its thumbnails but showing up
+ # as a single image page in the resulting PDF.
+ num_main_frames = 0
+ num_thumbnail_frames = 0
+ for i, mpent in enumerate(imgdata.mpinfo[0xB002]):
+ # check only the first frame for being the main image
+ if (
+ i == 0
+ and mpent["Attribute"]["DependentParentImageFlag"]
+ and not mpent["Attribute"]["DependentChildImageFlag"]
+ and mpent["Attribute"]["RepresentativeImageFlag"]
+ and mpent["Attribute"]["MPType"] == "Baseline MP Primary Image"
+ ):
+ num_main_frames += 1
+ elif (
+ not mpent["Attribute"]["DependentParentImageFlag"]
+ and mpent["Attribute"]["DependentChildImageFlag"]
+ and not mpent["Attribute"]["RepresentativeImageFlag"]
+ and mpent["Attribute"]["MPType"]
+ in [
+ "Large Thumbnail (VGA Equivalent)",
+ "Large Thumbnail (Full HD Equivalent)",
+ ]
+ ):
+ num_thumbnail_frames += 1
+ logger.debug(f"number of frames: {num_frames}")
+ logger.debug(f"number of main frames: {num_main_frames}")
+ logger.debug(f"number of thumbnail frames: {num_thumbnail_frames}")
+ # this MPO file is a main image plus zero or more thumbnails
+ # embed as-is unless the --include-thumbnails option was given
+ if num_frames == 1 or (
+ not include_thumbnails
+ and num_main_frames == 1
+ and num_thumbnail_frames + 1 == num_frames
+ ):
+ color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata, rot
+ )
+ if color == Colorspace["1"]:
+ raise JpegColorspaceError("jpeg can't be monochrome")
+ if color == Colorspace["P"]:
+ raise JpegColorspaceError("jpeg can't have a color palette")
+ if color == Colorspace["RGBA"]:
+ raise JpegColorspaceError("jpeg can't have an alpha channel")
+ logger.debug("read_images() embeds an MPO verbatim")
+ cleanup()
+ return [
+ (
+ color,
+ ndpi,
+ ImageFormat.JPEG,
+ rawdata,
+ None,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 8,
+ rotation,
+ iccp,
+ )
+ ]
+ # If the control flow reaches here, the MPO has more than a single
+ # frame but was not detected to be a main image followed by multiple
+ # thumbnails. We thus treat this MPO as we do other multi-frame images
+ # and include all its frames as individual pages.
for offset, mpent in zip(
imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002]
):
@@ -1710,6 +2066,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
]
+ if imgformat == ImageFormat.MIFF:
+ return parse_miff(rawdata)
+
# If our input is not JPEG or PNG, then we might have a format that
# supports multiple frames (like TIFF or GIF), so we need a loop to
# iterate through all frames of the image.
@@ -1875,7 +2234,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
)
else:
- if (
+ if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
+ # PDF does not support palette images with icc profile
+ if color == Colorspace.P:
+ newcolor = Colorspace.RGB
+ newimg = newimg.convert(mode="RGB")
+ elif color == Colorspace.PA:
+ newcolor = Colorspace.RGBA
+ newimg = newimg.convert(mode="RGBA")
+ smaskidat = None
+ elif (
color == Colorspace.RGBA
or color == Colorspace.LA
or color == Colorspace.PA
@@ -1889,25 +2257,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
newcolor = color
l, a = newimg.split()
newimg = l
+ elif color == Colorspace.PA or (
+ color == Colorspace.P and "transparency" in newimg.info
+ ):
+ newcolor = color
+ a = newimg.convert(mode="RGBA").split()[-1]
else:
newcolor = Colorspace.RGBA
r, g, b, a = newimg.convert(mode="RGBA").split()
newimg = Image.merge("RGB", (r, g, b))
- smaskidat, _, _ = to_png_data(a)
+ smaskidat, *_ = to_png_data(a)
logger.warning(
"Image contains an alpha channel. Computing a separate "
"soft mask (/SMask) image to store transparency in PDF."
)
- elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
- # PDF does not support palette images with icc profile
- if color == Colorspace.P:
- newcolor = Colorspace.RGB
- newimg = newimg.convert(mode="RGB")
- elif color == Colorspace.PA:
- newcolor = Colorspace.RGBA
- newimg = newimg.convert(mode="RGBA")
- smaskidat = None
else:
newcolor = color
smaskidat = None
@@ -2249,7 +2613,6 @@ def find_scale(pagewidth, pageheight):
# as a binary string representing the image content or as filenames to the
# images.
def convert(*images, **kwargs):
-
_default_kwargs = dict(
engine=None,
title=None,
@@ -2279,6 +2642,7 @@ def convert(*images, **kwargs):
artborder=None,
pdfa=None,
rotation=None,
+ include_thumbnails=False,
)
for kwname, default in _default_kwargs.items():
if kwname not in kwargs:
@@ -2322,11 +2686,16 @@ def convert(*images, **kwargs):
for img in images:
# img is allowed to be a path, a binary string representing image data
# or a file-like object (really anything that implements read())
- try:
- rawdata = img.read()
- except AttributeError:
+ # or a pathlib.Path object (really anything that implements read_bytes())
+ rawdata = None
+ for fun in "read", "read_bytes":
+ try:
+ rawdata = getattr(img, fun)()
+ except AttributeError:
+ pass
+ if rawdata is None:
if not isinstance(img, (str, bytes)):
- raise TypeError("Neither implements read() nor is str or bytes")
+ raise TypeError("Neither read(), read_bytes() nor is str or bytes")
# the thing doesn't have a read() function, so try if we can treat
# it as a file name
try:
@@ -2344,6 +2713,10 @@ def convert(*images, **kwargs):
rawdata = f.read()
f.close()
+ # md5 = hashlib.md5(rawdata).hexdigest()
+ # with open("./testdata/" + md5, "wb") as f:
+ # f.write(rawdata)
+
for (
color,
ndpi,
@@ -2362,6 +2735,7 @@ def convert(*images, **kwargs):
kwargs["colorspace"],
kwargs["first_frame_only"],
kwargs["rotation"],
+ kwargs["include_thumbnails"],
):
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
imgwidthpx, imgheightpx, ndpi
@@ -2737,7 +3111,7 @@ def valid_date(string):
else:
try:
return parser.parse(string)
- except TypeError:
+ except:
pass
# as a last resort, try the local date utility
try:
@@ -2750,7 +3124,7 @@ def valid_date(string):
except subprocess.CalledProcessError:
pass
else:
- return datetime.utcfromtimestamp(int(utime))
+ return datetime.fromtimestamp(int(utime))
raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
@@ -3452,7 +3826,35 @@ def gui():
app.mainloop()
-def main(argv=sys.argv):
+def file_is_icc(fname):
+ with open(fname, "rb") as f:
+ data = f.read(40)
+ if len(data) < 40:
+ return False
+ return data[36:] == b"acsp"
+
+
+def validate_icc(fname):
+ if not file_is_icc(fname):
+ raise argparse.ArgumentTypeError('"%s" is not an ICC profile' % fname)
+ return fname
+
+
+def get_default_icc_profile():
+ for profile in [
+ "/usr/share/color/icc/sRGB.icc",
+ "/usr/share/color/icc/OpenICC/sRGB.icc",
+ "/usr/share/color/icc/colord/sRGB.icc",
+ ]:
+ if not os.path.exists(profile):
+ continue
+ if not file_is_icc(profile):
+ continue
+ return profile
+ return "/usr/share/color/icc/sRGB.icc"
+
+
+def get_main_parser():
rendered_papersizes = ""
for k, v in sorted(papersizes.items()):
rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
@@ -3493,7 +3895,9 @@ Paper sizes:
the value in the second column has the same effect as giving the short hand
in the first column. Appending ^T (a caret/circumflex followed by the letter
T) turns the paper size from portrait into landscape. The postfix thus
- symbolizes the transpose. The values are case insensitive.
+ symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
+ the escape character, so you need to put quotes around the option value.
+ The values are case insensitive.
%s
@@ -3560,7 +3964,7 @@ Examples:
while preserving its aspect ratio and a print border of 2 cm on the top and
bottom and 2.5 cm on the left and right hand side.
- $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
+ $ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
original image size if the image is smaller than that.
@@ -3693,6 +4097,17 @@ RGB.""",
)
outargs.add_argument(
+ "--include-thumbnails",
+ action="store_true",
+ help="Some multi-frame formats like MPO carry a main image and "
+ "one or more scaled-down copies of the main image (thumbnails). "
+ "In such a case, img2pdf will only include the main image and "
+ "not create additional pages for each of the thumbnails. If this "
+ "option is set, img2pdf will instead create one page per frame and "
+ "thus store each thumbnail on its own page.",
+ )
+
+ outargs.add_argument(
"--pillow-limit-break",
action="store_true",
help="img2pdf uses the Python Imaging Library Pillow to read input "
@@ -3703,14 +4118,29 @@ RGB.""",
% Image.MAX_IMAGE_PIXELS,
)
- outargs.add_argument(
- "--pdfa",
- nargs="?",
- const="/usr/share/color/icc/sRGB.icc",
- default=None,
- help="Output a PDF/A-1b compliant document. By default, this will "
- "embed /usr/share/color/icc/sRGB.icc as the color profile.",
- )
+ if sys.platform == "win32":
+ # on Windows, there are no default paths to search for an ICC profile
+ # so make the argument required instead of optional
+ outargs.add_argument(
+ "--pdfa",
+ type=validate_icc,
+ help="Output a PDF/A-1b compliant document. The argument to this "
+ "option is the path to the ICC profile that will be embedded into "
+ "the resulting PDF.",
+ )
+ else:
+ outargs.add_argument(
+ "--pdfa",
+ nargs="?",
+ const=get_default_icc_profile(),
+ default=None,
+ type=validate_icc,
+ help="Output a PDF/A-1b compliant document. By default, this will "
+ "embed either /usr/share/color/icc/sRGB.icc, "
+ "/usr/share/color/icc/OpenICC/sRGB.icc or "
+ "/usr/share/color/icc/colord/sRGB.icc as the color profile, whichever "
+ "is found to exist first.",
+ )
sizeargs = parser.add_argument_group(
title="Image and page size and layout arguments",
@@ -3999,8 +4429,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
action="store_true",
help="Instruct the PDF viewer to open the PDF in fullscreen mode",
)
+ return parser
- args = parser.parse_args(argv[1:])
+
+def main(argv=sys.argv):
+ args = get_main_parser().parse_args(argv[1:])
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
@@ -4020,7 +4453,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
elif len(args.images) == 0 and len(args.from_file) == 0:
# if no positional arguments were supplied, read a single image from
# standard input
- logger.info("reading image from standard input")
+ print(
+ "Reading image from standard input...\n"
+ "Re-run with -h or --help for usage information.",
+ file=sys.stderr,
+ )
try:
images = [sys.stdin.buffer.read()]
except KeyboardInterrupt:
@@ -4081,6 +4518,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
artborder=args.art_border,
pdfa=args.pdfa,
rotation=args.rotation,
+ include_thumbnails=args.include_thumbnails,
)
except Exception as e:
logger.error("error: " + str(e))
diff --git a/src/img2pdf_test.py b/src/img2pdf_test.py
index 80dd8e0..4882092 100755
--- a/src/img2pdf_test.py
+++ b/src/img2pdf_test.py
@@ -19,6 +19,8 @@ from packaging.version import parse as parse_version
import warnings
import json
import pathlib
+import itertools
+import xml.etree.ElementTree as ET
img2pdfprog = os.getenv("img2pdfprog", default="src/img2pdf.py")
@@ -37,6 +39,14 @@ for glob in ICC_PROFILE_PATHS:
ICC_PROFILE = path
break
+HAVE_FAKETIME = True
+try:
+ ver = subprocess.check_output(["faketime", "--version"])
+ if b"faketime: Version " not in ver:
+ HAVE_FAKETIME = False
+except FileNotFoundError:
+ HAVE_FAKETIME = False
+
HAVE_MUTOOL = True
try:
ver = subprocess.check_output(["mutool", "-v"], stderr=subprocess.STDOUT)
@@ -113,11 +123,36 @@ except subprocess.CalledProcessError:
if not HAVE_JP2:
warnings.warn("imagemagick has no jpeg 2000 support, skipping certain checks...")
+# the result of compare -metric PSNR is either just a floating point value or a
+# floating point value following by the same value multiplied by 0.01,
+# surrounded in parenthesis since ImagemMagick 7.1.0-48:
+# https://github.com/ImageMagick/ImageMagick/commit/751829cd4c911d7a42953a47c1f73068d9e7da2f
+psnr_re = re.compile(rb"((?:inf|(?:0|[1-9][0-9]*)(?:\.[0-9]+)?))(?: \([0-9.]+\))?")
+
###############################################################################
# HELPER FUNCTIONS #
###############################################################################
+# Interpret a datetime string in a given timezone and format it according to a
+# given format string in in UTC.
+# We avoid using the Python datetime module for this job because doing so would
+# just replicate the code we want to test for correctness.
+def tz2utcstrftime(string, fmt, timezone):
+ return (
+ subprocess.check_output(
+ [
+ "date",
+ "--utc",
+ f'--date=TZ="{timezone}" {string}',
+ f"+{fmt}",
+ ]
+ )
+ .decode("utf8")
+ .removesuffix("\n")
+ )
+
+
def find_closest_palette_color(color, palette):
if color.ndim == 0:
idx = (numpy.abs(palette - color)).argmin()
@@ -291,7 +326,7 @@ def write_png(data, path, bitdepth, colortype, palette=None, iccp=None):
for j in range(valsperbyte):
if x + j >= data.shape[1]:
break
- val |= (data[y, x + j].astype(">u2") & (2 ** bitdepth - 1)) << (
+ val |= (data[y, x + j].astype(">u2") & (2**bitdepth - 1)) << (
(valsperbyte - j - 1) * bitdepth
)
raw += struct.pack(">B", val)
@@ -310,9 +345,7 @@ def write_png(data, path, bitdepth, colortype, palette=None, iccp=None):
def compare(im1, im2, exact, icc, cmyk):
if exact:
- if cmyk:
- raise Exception("cmyk cannot be exact")
- elif icc:
+ if icc:
raise Exception("icc cannot be exact")
else:
subprocess.check_call(
@@ -320,6 +353,8 @@ def compare(im1, im2, exact, icc, cmyk):
+ [
"-metric",
"AE",
+ "-alpha",
+ "off",
im1,
im2,
"null:",
@@ -345,7 +380,10 @@ def compare(im1, im2, exact, icc, cmyk):
stderr=subprocess.PIPE,
).stderr
assert psnr != b"0"
- psnr = float(psnr.strip(b"0"))
+ assert psnr != b"0 (0)"
+ assert psnr_re.fullmatch(psnr) is not None, psnr
+ psnr = psnr_re.fullmatch(psnr).group(1)
+ psnr = float(psnr)
assert psnr != 0 # or otherwise we would use the exact variant
assert psnr > 50
@@ -501,7 +539,9 @@ def compare_pdfimages_png(tmpdir, img, pdf, exact=True, icc=False):
stderr=subprocess.PIPE,
).stderr
assert psnr != b"0"
- psnr = float(psnr.strip(b"0"))
+ assert psnr != b"0 (0)"
+ psnr = psnr_re.fullmatch(psnr).group(1)
+ psnr = float(psnr)
assert psnr != 0 # or otherwise we would use the exact variant
assert psnr > 50
(tmpdir / "images-000.png").unlink()
@@ -586,7 +626,7 @@ def alpha_value():
alpha = numpy.zeros((60, 60, 4), dtype=numpy.dtype("int64"))
# draw three circles
- for (xpos, ypos, color) in [
+ for xpos, ypos, color in [
(12, 3, [0xFFFF, 0, 0, 0xFFFF]),
(21, 21, [0, 0xFFFF, 0, 0xFFFF]),
(3, 21, [0, 0, 0xFFFF, 0xFFFF]),
@@ -1171,6 +1211,74 @@ def jpg_2000_img(tmp_path_factory, tmp_normal_png):
@pytest.fixture(scope="session")
+def jpg_2000_rgba8_img(tmp_path_factory, tmp_alpha_png):
+ in_img = tmp_path_factory.mktemp("jpg_2000_rgba8") / "in.jp2"
+ subprocess.check_call(CONVERT + [str(tmp_alpha_png), "-depth", "8", str(in_img)])
+ identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
+ assert len(identify) == 1
+ # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
+ # put into an array, here we cater for the older version containing just
+ # the bare dictionary
+ if "image" in identify:
+ identify = [identify]
+ assert "image" in identify[0]
+ assert identify[0]["image"].get("format") == "JP2", str(identify)
+ assert identify[0]["image"].get("mimeType") == "image/jp2", str(identify)
+ assert identify[0]["image"].get("geometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
+ assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify)
+ assert identify[0]["image"].get("depth") == 8, str(identify)
+ assert identify[0]["image"].get("pageGeometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("compression") == "JPEG2000", str(identify)
+ yield in_img
+ in_img.unlink()
+
+
+@pytest.fixture(scope="session")
+def jpg_2000_rgba16_img(tmp_path_factory, tmp_alpha_png):
+ in_img = tmp_path_factory.mktemp("jpg_2000_rgba16") / "in.jp2"
+ subprocess.check_call(CONVERT + [str(tmp_alpha_png), str(in_img)])
+ identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
+ assert len(identify) == 1
+ # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
+ # put into an array, here we cater for the older version containing just
+ # the bare dictionary
+ if "image" in identify:
+ identify = [identify]
+ assert "image" in identify[0]
+ assert identify[0]["image"].get("format") == "JP2", str(identify)
+ assert identify[0]["image"].get("mimeType") == "image/jp2", str(identify)
+ assert identify[0]["image"].get("geometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
+ assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify)
+ assert identify[0]["image"].get("depth") == 16, str(identify)
+ assert identify[0]["image"].get("pageGeometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("compression") == "JPEG2000", str(identify)
+ yield in_img
+ in_img.unlink()
+
+
+@pytest.fixture(scope="session")
def png_rgb8_img(tmp_normal_png):
in_img = tmp_normal_png
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
@@ -1582,7 +1690,7 @@ def png_gray1_img(tmp_path_factory, tmp_gray1_png):
"y": 0,
}, str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
- assert identify[0]["image"].get("type") == "Bilevel", str(identify)
+ assert identify[0]["image"].get("type") in ["Bilevel", "Grayscale"], str(identify)
assert identify[0]["image"].get("depth") == 1, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2313,10 +2421,6 @@ def tiff_float_img(tmp_path_factory, tmp_normal_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("baseDepth") == 32, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
@@ -2332,9 +2436,6 @@ def tiff_float_img(tmp_path_factory, tmp_normal_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2374,10 +2475,6 @@ def tiff_cmyk8_img(tmp_path_factory, tmp_normal_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2388,9 +2485,6 @@ def tiff_cmyk8_img(tmp_path_factory, tmp_normal_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "separated"
@@ -2433,10 +2527,6 @@ def tiff_cmyk16_img(tmp_path_factory, tmp_normal_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2447,9 +2537,6 @@ def tiff_cmyk16_img(tmp_path_factory, tmp_normal_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "separated"
@@ -2482,10 +2569,6 @@ def tiff_rgb8_img(tmp_path_factory, tmp_normal_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2496,9 +2579,6 @@ def tiff_rgb8_img(tmp_path_factory, tmp_normal_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2538,14 +2618,6 @@ def tiff_rgb12_img(tmp_path_factory, tmp_normal16_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
- if identify[0].get("version", "0") < "1.0":
- assert identify[0]["image"].get("depth") == 12, str(identify)
- else:
- assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("baseDepth") == 12, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2556,9 +2628,6 @@ def tiff_rgb12_img(tmp_path_factory, tmp_normal16_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2598,11 +2667,6 @@ def tiff_rgb14_img(tmp_path_factory, tmp_normal16_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
- assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("baseDepth") == 14, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2613,9 +2677,6 @@ def tiff_rgb14_img(tmp_path_factory, tmp_normal16_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2655,10 +2716,6 @@ def tiff_rgb16_img(tmp_path_factory, tmp_normal16_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2669,9 +2726,6 @@ def tiff_rgb16_img(tmp_path_factory, tmp_normal16_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2712,10 +2766,6 @@ def tiff_rgba8_img(tmp_path_factory, tmp_alpha_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2726,9 +2776,6 @@ def tiff_rgba8_img(tmp_path_factory, tmp_alpha_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unassociated"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2769,10 +2816,6 @@ def tiff_rgba16_img(tmp_path_factory, tmp_alpha_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColorAlpha", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2783,9 +2826,6 @@ def tiff_rgba16_img(tmp_path_factory, tmp_alpha_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unassociated"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -2825,10 +2865,6 @@ def tiff_gray1_img(tmp_path_factory, tmp_gray1_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Bilevel", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 1, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2839,9 +2875,6 @@ def tiff_gray1_img(tmp_path_factory, tmp_gray1_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "min-is-black"
@@ -2882,10 +2915,6 @@ def tiff_gray2_img(tmp_path_factory, tmp_gray2_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Grayscale", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 2, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2896,9 +2925,6 @@ def tiff_gray2_img(tmp_path_factory, tmp_gray2_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "min-is-black"
@@ -2939,10 +2965,6 @@ def tiff_gray4_img(tmp_path_factory, tmp_gray4_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Grayscale", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 4, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -2953,9 +2975,6 @@ def tiff_gray4_img(tmp_path_factory, tmp_gray4_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "min-is-black"
@@ -2996,10 +3015,6 @@ def tiff_gray8_img(tmp_path_factory, tmp_gray8_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Grayscale", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3010,9 +3025,6 @@ def tiff_gray8_img(tmp_path_factory, tmp_gray8_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "min-is-black"
@@ -3053,10 +3065,6 @@ def tiff_gray16_img(tmp_path_factory, tmp_gray16_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Grayscale", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 16, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3067,9 +3075,6 @@ def tiff_gray16_img(tmp_path_factory, tmp_gray16_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric")
== "min-is-black"
@@ -3112,10 +3117,6 @@ def tiff_multipage_img(tmp_path_factory, tmp_normal_png, tmp_inverse_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3126,9 +3127,6 @@ def tiff_multipage_img(tmp_path_factory, tmp_normal_png, tmp_inverse_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -3152,10 +3150,6 @@ def tiff_multipage_img(tmp_path_factory, tmp_normal_png, tmp_inverse_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "TrueColor", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3166,9 +3160,6 @@ def tiff_multipage_img(tmp_path_factory, tmp_normal_png, tmp_inverse_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "RGB"
), str(identify)
@@ -3201,10 +3192,6 @@ def tiff_palette1_img(tmp_path_factory, tmp_palette1_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "Palette", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("baseDepth") == 1, str(identify)
assert identify[0]["image"].get("colormapEntries") == 2, str(identify)
@@ -3217,9 +3204,6 @@ def tiff_palette1_img(tmp_path_factory, tmp_palette1_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
), str(identify)
@@ -3251,10 +3235,6 @@ def tiff_palette2_img(tmp_path_factory, tmp_palette2_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "Palette", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("baseDepth") == 2, str(identify)
assert identify[0]["image"].get("colormapEntries") == 4, str(identify)
@@ -3267,9 +3247,6 @@ def tiff_palette2_img(tmp_path_factory, tmp_palette2_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
), str(identify)
@@ -3301,10 +3278,6 @@ def tiff_palette4_img(tmp_path_factory, tmp_palette4_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "Palette", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("baseDepth") == 4, str(identify)
assert identify[0]["image"].get("colormapEntries") == 16, str(identify)
@@ -3317,9 +3290,6 @@ def tiff_palette4_img(tmp_path_factory, tmp_palette4_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
), str(identify)
@@ -3351,10 +3321,6 @@ def tiff_palette8_img(tmp_path_factory, tmp_palette8_png):
}, str(identify)
assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
assert identify[0]["image"].get("type") == "Palette", str(identify)
- endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
assert identify[0]["image"].get("depth") == 8, str(identify)
assert identify[0]["image"].get("colormapEntries") == 256, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
@@ -3366,9 +3332,6 @@ def tiff_palette8_img(tmp_path_factory, tmp_palette8_png):
assert (
identify[0]["image"].get("properties", {}).get("tiff:alpha") == "unspecified"
), str(identify)
- assert identify[0]["image"].get("properties", {}).get("tiff:endian") == "lsb", str(
- identify
- )
assert (
identify[0]["image"].get("properties", {}).get("tiff:photometric") == "palette"
), str(identify)
@@ -3415,9 +3378,10 @@ def tiff_ccitt_lsb_m2l_white_img(tmp_path_factory, tmp_gray1_png):
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Bilevel", str(identify)
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
+ assert identify[0]["image"].get(endian) in [
+ "Undefined",
+ "LSB",
+ ], str(identify)
assert identify[0]["image"].get("depth") == 1, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3665,9 +3629,10 @@ def tiff_ccitt_lsb_m2l_black_img(tmp_path_factory, tmp_gray1_png):
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Bilevel", str(identify)
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
+ assert identify[0]["image"].get(endian) in [
+ "Undefined",
+ "LSB",
+ ], str(identify)
assert identify[0]["image"].get("depth") == 1, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3755,9 +3720,10 @@ def tiff_ccitt_nometa1_img(tmp_path_factory, tmp_gray1_png):
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("type") == "Bilevel", str(identify)
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
+ assert identify[0]["image"].get(endian) in [
+ "Undefined",
+ "LSB",
+ ], str(identify)
assert identify[0]["image"].get("depth") == 1, str(identify)
assert identify[0]["image"].get("pageGeometry") == {
"width": 60,
@@ -3839,9 +3805,10 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
assert identify[0]["image"].get("units") == "PixelsPerInch", str(identify)
assert identify[0]["image"].get("type") == "Bilevel", str(identify)
endian = "endianess" if identify[0].get("version", "0") < "1.0" else "endianness"
- assert identify[0]["image"].get(endian) in ["Undefined", "LSB",], str(
- identify
- ) # FIXME: should be LSB
+ assert identify[0]["image"].get(endian) in [
+ "Undefined",
+ "LSB",
+ ], str(identify)
assert identify[0]["image"].get("colorspace") == "Gray", str(identify)
assert identify[0]["image"].get("depth") == 1, str(identify)
assert identify[0]["image"].get("compression") == "Group4", str(identify)
@@ -3877,6 +3844,127 @@ def tiff_ccitt_nometa2_img(tmp_path_factory, tmp_gray1_png):
@pytest.fixture(scope="session")
+def miff_cmyk8_img(tmp_path_factory, tmp_normal_png):
+ in_img = tmp_path_factory.mktemp("miff_cmyk8") / "in.miff"
+ subprocess.check_call(
+ CONVERT
+ + [
+ str(tmp_normal_png),
+ "-colorspace",
+ "cmyk",
+ str(in_img),
+ ]
+ )
+ identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
+ assert len(identify) == 1
+ # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
+ # put into an array, here we cater for the older version containing just
+ # the bare dictionary
+ if "image" in identify:
+ identify = [identify]
+ assert "image" in identify[0]
+ assert identify[0]["image"].get("format") == "MIFF", str(identify)
+ assert identify[0]["image"].get("class") == "DirectClass"
+ assert identify[0]["image"].get("type") == "ColorSeparation"
+ assert identify[0]["image"].get("geometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
+ assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
+ assert identify[0]["image"].get("depth") == 8, str(identify)
+ assert identify[0]["image"].get("pageGeometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ yield in_img
+ in_img.unlink()
+
+
+@pytest.fixture(scope="session")
+def miff_cmyk16_img(tmp_path_factory, tmp_normal_png):
+ in_img = tmp_path_factory.mktemp("miff_cmyk16") / "in.miff"
+ subprocess.check_call(
+ CONVERT
+ + [
+ str(tmp_normal_png),
+ "-depth",
+ "16",
+ "-colorspace",
+ "cmyk",
+ str(in_img),
+ ]
+ )
+ identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
+ assert len(identify) == 1
+ # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
+ # put into an array, here we cater for the older version containing just
+ # the bare dictionary
+ if "image" in identify:
+ identify = [identify]
+ assert "image" in identify[0]
+ assert identify[0]["image"].get("format") == "MIFF", str(identify)
+ assert identify[0]["image"].get("class") == "DirectClass"
+ assert identify[0]["image"].get("type") == "ColorSeparation"
+ assert identify[0]["image"].get("geometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("colorspace") == "CMYK", str(identify)
+ assert identify[0]["image"].get("type") == "ColorSeparation", str(identify)
+ assert identify[0]["image"].get("depth") == 16, str(identify)
+ assert identify[0]["image"].get("baseDepth") == 16, str(identify)
+ assert identify[0]["image"].get("pageGeometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ yield in_img
+ in_img.unlink()
+
+
+@pytest.fixture(scope="session")
+def miff_rgb8_img(tmp_path_factory, tmp_normal_png):
+ in_img = tmp_path_factory.mktemp("miff_rgb8") / "in.miff"
+ subprocess.check_call(CONVERT + [str(tmp_normal_png), str(in_img)])
+ identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
+ assert len(identify) == 1
+ # somewhere between imagemagick 6.9.7.4 and 6.9.9.34, the json output was
+ # put into an array, here we cater for the older version containing just
+ # the bare dictionary
+ if "image" in identify:
+ identify = [identify]
+ assert "image" in identify[0]
+ assert identify[0]["image"].get("format") == "MIFF", str(identify)
+ assert identify[0]["image"].get("class") == "DirectClass"
+ assert identify[0]["image"].get("type") == "TrueColor"
+ assert identify[0]["image"].get("geometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ assert identify[0]["image"].get("colorspace") == "sRGB", str(identify)
+ assert identify[0]["image"].get("type") == "TrueColor", str(identify)
+ assert identify[0]["image"].get("depth") == 8, str(identify)
+ assert identify[0]["image"].get("pageGeometry") == {
+ "width": 60,
+ "height": 60,
+ "x": 0,
+ "y": 0,
+ }, str(identify)
+ yield in_img
+ in_img.unlink()
+
+
+@pytest.fixture(scope="session")
def png_icc_img(tmp_icc_png):
in_img = tmp_icc_png
identify = json.loads(subprocess.check_output(CONVERT + [str(in_img), "json:"]))
@@ -4043,6 +4131,60 @@ def jpg_2000_pdf(tmp_path_factory, jpg_2000_img, request):
@pytest.fixture(scope="session", params=["internal", "pikepdf"])
+def jpg_2000_rgba8_pdf(tmp_path_factory, jpg_2000_rgba8_img, request):
+ out_pdf = tmp_path_factory.mktemp("jpg_2000_rgba8_pdf") / "out.pdf"
+ subprocess.check_call(
+ [
+ img2pdfprog,
+ "--producer=",
+ "--nodate",
+ "--engine=" + request.param,
+ "--output=" + str(out_pdf),
+ jpg_2000_rgba8_img,
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert (
+ p.pages[0].Contents.read_bytes()
+ == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
+ )
+ assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
+ assert not hasattr(p.pages[0].Resources.XObject.Im0, "ColorSpace")
+ assert p.pages[0].Resources.XObject.Im0.Filter == "/JPXDecode"
+ assert p.pages[0].Resources.XObject.Im0.Height == 60
+ assert p.pages[0].Resources.XObject.Im0.Width == 60
+ yield out_pdf
+ out_pdf.unlink()
+
+
+@pytest.fixture(scope="session", params=["internal", "pikepdf"])
+def jpg_2000_rgba16_pdf(tmp_path_factory, jpg_2000_rgba16_img, request):
+ out_pdf = tmp_path_factory.mktemp("jpg_2000_rgba16_pdf") / "out.pdf"
+ subprocess.check_call(
+ [
+ img2pdfprog,
+ "--producer=",
+ "--nodate",
+ "--engine=" + request.param,
+ "--output=" + str(out_pdf),
+ jpg_2000_rgba16_img,
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert (
+ p.pages[0].Contents.read_bytes()
+ == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
+ )
+ assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16
+ assert not hasattr(p.pages[0].Resources.XObject.Im0, "ColorSpace")
+ assert p.pages[0].Resources.XObject.Im0.Filter == "/JPXDecode"
+ assert p.pages[0].Resources.XObject.Im0.Height == 60
+ assert p.pages[0].Resources.XObject.Im0.Width == 60
+ yield out_pdf
+ out_pdf.unlink()
+
+
+@pytest.fixture(scope="session", params=["internal", "pikepdf"])
def png_rgb8_pdf(tmp_path_factory, png_rgb8_img, request):
out_pdf = tmp_path_factory.mktemp("png_rgb8_pdf") / "out.pdf"
subprocess.check_call(
@@ -4131,9 +4273,10 @@ def gif_transparent_pdf(tmp_path_factory, gif_transparent_img, request):
== b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
)
assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
- assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceRGB"
+ assert p.pages[0].Resources.XObject.Im0.ColorSpace[0] == "/Indexed"
+ assert p.pages[0].Resources.XObject.Im0.ColorSpace[1] == "/DeviceRGB"
assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
- assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3
+ assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 1
assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
assert p.pages[0].Resources.XObject.Im0.Height == 60
@@ -5261,6 +5404,90 @@ def tiff_ccitt_nometa2_pdf(tmp_path_factory, tiff_ccitt_nometa2_img, request):
out_pdf.unlink()
+@pytest.fixture(scope="session", params=["internal", "pikepdf"])
+def miff_cmyk8_pdf(tmp_path_factory, miff_cmyk8_img, request):
+ out_pdf = tmp_path_factory.mktemp("miff_cmyk8_pdf") / "out.pdf"
+ subprocess.check_call(
+ [
+ img2pdfprog,
+ "--producer=",
+ "--nodate",
+ "--engine=" + request.param,
+ "--output=" + str(out_pdf),
+ str(miff_cmyk8_img),
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert (
+ p.pages[0].Contents.read_bytes()
+ == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
+ )
+ assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
+ assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceCMYK"
+ assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
+ assert p.pages[0].Resources.XObject.Im0.Height == 60
+ assert p.pages[0].Resources.XObject.Im0.Width == 60
+ yield out_pdf
+ out_pdf.unlink()
+
+
+@pytest.fixture(scope="session", params=["internal", "pikepdf"])
+def miff_cmyk16_pdf(tmp_path_factory, miff_cmyk16_img, request):
+ out_pdf = tmp_path_factory.mktemp("miff_cmyk16_pdf") / "out.pdf"
+ subprocess.check_call(
+ [
+ img2pdfprog,
+ "--producer=",
+ "--nodate",
+ "--engine=" + request.param,
+ "--output=" + str(out_pdf),
+ str(miff_cmyk16_img),
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert (
+ p.pages[0].Contents.read_bytes()
+ == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
+ )
+ assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 16
+ assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceCMYK"
+ assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
+ assert p.pages[0].Resources.XObject.Im0.Height == 60
+ assert p.pages[0].Resources.XObject.Im0.Width == 60
+ yield out_pdf
+ out_pdf.unlink()
+
+
+@pytest.fixture(scope="session", params=["internal", "pikepdf"])
+def miff_rgb8_pdf(tmp_path_factory, miff_rgb8_img, request):
+ out_pdf = tmp_path_factory.mktemp("miff_rgb8_pdf") / "out.pdf"
+ subprocess.check_call(
+ [
+ img2pdfprog,
+ "--producer=",
+ "--nodate",
+ "--engine=" + request.param,
+ "--output=" + str(out_pdf),
+ str(miff_rgb8_img),
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert (
+ p.pages[0].Contents.read_bytes()
+ == b"q\n45.0000 0 0 45.0000 0.0000 0.0000 cm\n/Im0 Do\nQ"
+ )
+ assert p.pages[0].Resources.XObject.Im0.BitsPerComponent == 8
+ assert p.pages[0].Resources.XObject.Im0.ColorSpace == "/DeviceRGB"
+ assert p.pages[0].Resources.XObject.Im0.DecodeParms.BitsPerComponent == 8
+ assert p.pages[0].Resources.XObject.Im0.DecodeParms.Colors == 3
+ assert p.pages[0].Resources.XObject.Im0.DecodeParms.Predictor == 15
+ assert p.pages[0].Resources.XObject.Im0.Filter == "/FlateDecode"
+ assert p.pages[0].Resources.XObject.Im0.Height == 60
+ assert p.pages[0].Resources.XObject.Im0.Width == 60
+ yield out_pdf
+ out_pdf.unlink()
+
+
###############################################################################
# TEST CASES #
###############################################################################
@@ -5327,11 +5554,9 @@ def test_jpg_rot(tmp_path_factory, jpg_rot_img, jpg_rot_pdf):
)
def test_jpg_cmyk(tmp_path_factory, jpg_cmyk_img, jpg_cmyk_pdf):
tmpdir = tmp_path_factory.mktemp("jpg_cmyk")
- compare_ghostscript(
- tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, gsdevice="tiff32nc", exact=False
- )
+ compare_ghostscript(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, gsdevice="tiff32nc")
# not testing with poppler as it cannot write CMYK images
- compare_mupdf(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, exact=False, cmyk=True)
+ compare_mupdf(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf, cmyk=True)
compare_pdfimages_cmyk(tmpdir, jpg_cmyk_img, jpg_cmyk_pdf)
@@ -5354,6 +5579,45 @@ def test_jpg_2000(tmp_path_factory, jpg_2000_img, jpg_2000_pdf):
sys.platform in ["win32"],
reason="test utilities not available on Windows and MacOS",
)
+@pytest.mark.skipif(
+ not HAVE_JP2, reason="requires imagemagick with support for jpeg2000"
+)
+@pytest.mark.skipif(
+ True, reason="https://github.com/ImageMagick/ImageMagick6/issues/285"
+)
+def test_jpg_2000_rgba8(tmp_path_factory, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf):
+ tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba8")
+ compare_ghostscript(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
+ compare_poppler(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
+ # compare_mupdf(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
+ compare_pdfimages_jp2(tmpdir, jpg_2000_rgba8_img, jpg_2000_rgba8_pdf)
+
+
+@pytest.mark.skipif(
+ sys.platform in ["win32"],
+ reason="test utilities not available on Windows and MacOS",
+)
+@pytest.mark.skipif(
+ not HAVE_JP2, reason="requires imagemagick with support for jpeg2000"
+)
+@pytest.mark.skipif(
+ True, reason="https://github.com/ImageMagick/ImageMagick6/issues/285"
+)
+def test_jpg_2000_rgba16(tmp_path_factory, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf):
+ tmpdir = tmp_path_factory.mktemp("jpg_2000_rgba16")
+ compare_ghostscript(
+ tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf, gsdevice="tiff48nc"
+ )
+ # poppler outputs 8-bit RGB so the comparison will not be exact
+ # compare_poppler(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf, exact=False)
+ # compare_mupdf(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf)
+ compare_pdfimages_jp2(tmpdir, jpg_2000_rgba16_img, jpg_2000_rgba16_pdf)
+
+
+@pytest.mark.skipif(
+ sys.platform in ["win32"],
+ reason="test utilities not available on Windows and MacOS",
+)
def test_png_rgb8(tmp_path_factory, png_rgb8_img, png_rgb8_pdf):
tmpdir = tmp_path_factory.mktemp("png_rgb8")
compare_ghostscript(tmpdir, png_rgb8_img, png_rgb8_pdf)
@@ -5685,10 +5949,13 @@ def test_tiff_float(tmp_path_factory, tiff_float_img, engine):
def test_tiff_cmyk8(tmp_path_factory, tiff_cmyk8_img, tiff_cmyk8_pdf):
tmpdir = tmp_path_factory.mktemp("tiff_cmyk8")
compare_ghostscript(
- tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf, gsdevice="tiff32nc", exact=False
+ tmpdir,
+ tiff_cmyk8_img,
+ tiff_cmyk8_pdf,
+ gsdevice="tiff32nc",
)
# not testing with poppler as it cannot write CMYK images
- compare_mupdf(tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf, exact=False, cmyk=True)
+ compare_mupdf(tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf, cmyk=True)
compare_pdfimages_tiff(tmpdir, tiff_cmyk8_img, tiff_cmyk8_pdf)
@@ -6123,6 +6390,46 @@ def test_tiff_ccitt_nometa2(
compare_pdfimages_tiff(tmpdir, tiff_ccitt_nometa2_img, tiff_ccitt_nometa2_pdf)
+@pytest.mark.skipif(
+ sys.platform in ["win32"],
+ reason="test utilities not available on Windows and MacOS",
+)
+def test_miff_cmyk8(tmp_path_factory, miff_cmyk8_img, tiff_cmyk8_img, miff_cmyk8_pdf):
+ tmpdir = tmp_path_factory.mktemp("miff_cmyk8")
+ compare_ghostscript(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, gsdevice="tiff32nc")
+ # not testing with poppler as it cannot write CMYK images
+ compare_mupdf(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf, cmyk=True)
+ compare_pdfimages_tiff(tmpdir, tiff_cmyk8_img, miff_cmyk8_pdf)
+
+
+@pytest.mark.skipif(
+ sys.platform in ["win32"],
+ reason="test utilities not available on Windows and MacOS",
+)
+def test_miff_cmyk16(
+ tmp_path_factory, miff_cmyk16_img, tiff_cmyk16_img, miff_cmyk16_pdf
+):
+ tmpdir = tmp_path_factory.mktemp("miff_cmyk16")
+ compare_ghostscript(
+ tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, gsdevice="tiff32nc", exact=False
+ )
+ # not testing with poppler as it cannot write CMYK images
+ compare_mupdf(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf, exact=False, cmyk=True)
+ # compare_pdfimages_tiff(tmpdir, tiff_cmyk16_img, miff_cmyk16_pdf)
+
+
+@pytest.mark.skipif(
+ sys.platform in ["win32"],
+ reason="test utilities not available on Windows and MacOS",
+)
+def test_miff_rgb8(tmp_path_factory, miff_rgb8_img, tiff_rgb8_img, miff_rgb8_pdf):
+ tmpdir = tmp_path_factory.mktemp("miff_rgb8")
+ compare_ghostscript(tmpdir, tiff_rgb8_img, miff_rgb8_pdf, gsdevice="tiff24nc")
+ compare_poppler(tmpdir, tiff_rgb8_img, miff_rgb8_pdf)
+ compare_mupdf(tmpdir, tiff_rgb8_img, miff_rgb8_pdf)
+ compare_pdfimages_tiff(tmpdir, tiff_rgb8_img, miff_rgb8_pdf)
+
+
# we define some variables so that the table below can be narrower
psl = (972, 504) # --pagesize landscape
psp = (504, 972) # --pagesize portrait
@@ -6554,6 +6861,96 @@ def general_input(request):
return request.param
+@pytest.mark.skipif(not HAVE_FAKETIME, reason="requires faketime")
+@pytest.mark.parametrize(
+ "engine,testdata,timezone,pdfa",
+ itertools.product(
+ ["internal", "pikepdf"],
+ ["2021-02-05 17:49:00"],
+ ["Europe/Berlin", "GMT+12"],
+ [True, False],
+ ),
+)
+def test_faketime(tmp_path_factory, jpg_img, engine, testdata, timezone, pdfa):
+ expected = tz2utcstrftime(testdata, "D:%Y%m%d%H%M%SZ", timezone)
+ out_pdf = tmp_path_factory.mktemp("faketime") / "out.pdf"
+ subprocess.check_call(
+ ["env", f"TZ={timezone}", "faketime", "-f", testdata, img2pdfprog]
+ + (["--pdfa"] if pdfa else [])
+ + [
+ "--producer=",
+ "--engine=" + engine,
+ "--output=" + str(out_pdf),
+ str(jpg_img),
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert p.docinfo.CreationDate == expected
+ assert p.docinfo.ModDate == expected
+ if pdfa:
+ assert p.Root.Metadata.Subtype == "/XML"
+ assert p.Root.Metadata.Type == "/Metadata"
+ expected = tz2utcstrftime(testdata, "%Y-%m-%dT%H:%M:%SZ", timezone)
+ root = ET.fromstring(p.Root.Metadata.read_bytes())
+ for k in ["ModifyDate", "CreateDate"]:
+ assert (
+ root.find(
+ f".//xmp:{k}", {"xmp": "http://ns.adobe.com/xap/1.0/"}
+ ).text
+ == expected
+ )
+ out_pdf.unlink()
+
+
+@pytest.mark.parametrize(
+ "engine,testdata,timezone,pdfa",
+ itertools.product(
+ ["internal", "pikepdf"],
+ [
+ "2021-02-05 17:49:00",
+ "2021-02-05T17:49:00",
+ "Fri, 05 Feb 2021 17:49:00 +0100",
+ "last year 12:00",
+ ],
+ ["Europe/Berlin", "GMT+12"],
+ [True, False],
+ ),
+)
+def test_date(tmp_path_factory, jpg_img, engine, testdata, timezone, pdfa):
+ # we use the date utility to convert the timestamp from the local
+ # timezone into UTC with the format used by PDF
+ expected = tz2utcstrftime(testdata, "D:%Y%m%d%H%M%SZ", timezone)
+ out_pdf = tmp_path_factory.mktemp("faketime") / "out.pdf"
+ subprocess.check_call(
+ ["env", f"TZ={timezone}", img2pdfprog]
+ + (["--pdfa"] if pdfa else [])
+ + [
+ f"--moddate={testdata}",
+ f"--creationdate={testdata}",
+ "--producer=",
+ "--engine=" + engine,
+ "--output=" + str(out_pdf),
+ str(jpg_img),
+ ]
+ )
+ with pikepdf.open(str(out_pdf)) as p:
+ assert p.docinfo.CreationDate == expected
+ assert p.docinfo.ModDate == expected
+ if pdfa:
+ assert p.Root.Metadata.Subtype == "/XML"
+ assert p.Root.Metadata.Type == "/Metadata"
+ expected = tz2utcstrftime(testdata, "%Y-%m-%dT%H:%M:%SZ", timezone)
+ root = ET.fromstring(p.Root.Metadata.read_bytes())
+ for k in ["ModifyDate", "CreateDate"]:
+ assert (
+ root.find(
+ f".//xmp:{k}", {"xmp": "http://ns.adobe.com/xap/1.0/"}
+ ).text
+ == expected
+ )
+ out_pdf.unlink()
+
+
@pytest.mark.parametrize("engine", ["internal", "pikepdf"])
def test_general(general_input, engine):
inputf = os.path.join(os.path.dirname(__file__), "tests", "input", general_input)
diff --git a/src/jp2.py b/src/jp2.py
index ae54746..44d3e21 100644
--- a/src/jp2.py
+++ b/src/jp2.py
@@ -37,9 +37,8 @@ def getBox(data, byteStart, noBytes):
def parse_ihdr(data):
- height = struct.unpack(">I", data[0:4])[0]
- width = struct.unpack(">I", data[4:8])[0]
- return width, height
+ height, width, channels, bpp = struct.unpack(">IIHB", data[:11])
+ return width, height, channels, bpp + 1
def parse_colr(data):
@@ -59,8 +58,8 @@ def parse_colr(data):
def parse_resc(data):
hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data)
- hdpi = ((hnum / hden) * (10 ** hexp) * 100) / 2.54
- vdpi = ((vnum / vden) * (10 ** vexp) * 100) / 2.54
+ hdpi = ((hnum / hden) * (10**hexp) * 100) / 2.54
+ vdpi = ((vnum / vden) * (10**vexp) * 100) / 2.54
return hdpi, vdpi
@@ -85,13 +84,13 @@ def parse_jp2h(data):
while byteStart < noBytes and boxLengthValue != 0:
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
if boxType == b"ihdr":
- width, height = parse_ihdr(boxContents)
+ width, height, channels, bpp = parse_ihdr(boxContents)
elif boxType == b"colr":
colorspace = parse_colr(boxContents)
elif boxType == b"res ":
hdpi, vdpi = parse_res(boxContents)
byteStart = byteEnd
- return (width, height, colorspace, hdpi, vdpi)
+ return (width, height, colorspace, hdpi, vdpi, channels, bpp)
def parsejp2(data):
@@ -102,7 +101,9 @@ def parsejp2(data):
while byteStart < noBytes and boxLengthValue != 0:
boxLengthValue, boxType, byteEnd, boxContents = getBox(data, byteStart, noBytes)
if boxType == b"jp2h":
- width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents)
+ width, height, colorspace, hdpi, vdpi, channels, bpp = parse_jp2h(
+ boxContents
+ )
break
byteStart = byteEnd
if not width:
@@ -112,13 +113,41 @@ def parsejp2(data):
if not colorspace:
raise Exception("no colorspace in jp2 header")
# retrieving the dpi is optional so we do not error out if not present
- return (width, height, colorspace, hdpi, vdpi)
+ return (width, height, colorspace, hdpi, vdpi, channels, bpp)
+
+
+def parsej2k(data):
+ lsiz, rsiz, xsiz, ysiz, xosiz, yosiz, _, _, _, _, csiz = struct.unpack(
+ ">HHIIIIIIIIH", data[4:42]
+ )
+ ssiz = [None] * csiz
+ xrsiz = [None] * csiz
+ yrsiz = [None] * csiz
+ for i in range(csiz):
+ ssiz[i], xrsiz[i], yrsiz[i] = struct.unpack(
+ "BBB", data[42 + 3 * i : 42 + 3 * (i + 1)]
+ )
+ assert ssiz == [7, 7, 7]
+ return xsiz - xosiz, ysiz - yosiz, None, None, None, csiz, 8
+
+
+def parse(data):
+ if data[:4] == b"\xff\x4f\xff\x51":
+ return parsej2k(data)
+ else:
+ return parsejp2(data)
if __name__ == "__main__":
import sys
- width, height, colorspace = parsejp2(open(sys.argv[1]).read())
- sys.stdout.write("width = %d" % width)
- sys.stdout.write("height = %d" % height)
- sys.stdout.write("colorspace = %s" % colorspace)
+ width, height, colorspace, hdpi, vdpi, channels, bpp = parse(
+ open(sys.argv[1], "rb").read()
+ )
+ print("width = %d" % width)
+ print("height = %d" % height)
+ print("colorspace = %s" % colorspace)
+ print("hdpi = %s" % hdpi)
+ print("vdpi = %s" % vdpi)
+ print("channels = %s" % channels)
+ print("bpp = %s" % bpp)