From 242e222297a3e2c3f78273645faaf63c4a8e238f Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Tue, 16 Aug 2016 16:35:49 +0200 Subject: Import img2pdf_0.2.1-1.debian.tar.xz [dgit import tarball img2pdf 0.2.1-1 img2pdf_0.2.1-1.debian.tar.xz] --- changelog | 18 ++++++++++++++++++ compat | 1 + control | 40 ++++++++++++++++++++++++++++++++++++++++ copyright | 36 ++++++++++++++++++++++++++++++++++++ img2pdf.install | 1 + img2pdf.manpages | 1 + python3-img2pdf.install | 1 + python3-jp2.substvars | 2 ++ rules | 13 +++++++++++++ source/format | 1 + watch | 4 ++++ 11 files changed, 118 insertions(+) create mode 100644 changelog create mode 100644 compat create mode 100644 control create mode 100644 copyright create mode 100644 img2pdf.install create mode 100644 img2pdf.manpages create mode 100644 python3-img2pdf.install create mode 100644 python3-jp2.substvars create mode 100755 rules create mode 100644 source/format create mode 100644 watch diff --git a/changelog b/changelog new file mode 100644 index 0000000..4f70610 --- /dev/null +++ b/changelog @@ -0,0 +1,18 @@ +img2pdf (0.2.1-1) unstable; urgency=medium + + * New upstream release (closes: #833376) + + -- Johannes Schauer Tue, 16 Aug 2016 16:35:49 +0200 + +img2pdf (0.2.0-2) unstable; urgency=medium + + * Add missing dependency on python3-pkg-resources (Thanks, Sean Whitton for + the diff) (Closes: #818617). + + -- Johannes Schauer Sun, 03 Jul 2016 06:08:37 +0200 + +img2pdf (0.2.0-1) unstable; urgency=medium + + * Initial release. (Closes: #742075) + + -- Johannes Schauer Tue, 09 Feb 2016 08:50:55 +0100 diff --git a/compat b/compat new file mode 100644 index 0000000..ec63514 --- /dev/null +++ b/compat @@ -0,0 +1 @@ +9 diff --git a/control b/control new file mode 100644 index 0000000..a9bbcfa --- /dev/null +++ b/control @@ -0,0 +1,40 @@ +Source: img2pdf +Maintainer: Johannes Schauer +Section: python +Priority: optional +Build-Depends: dh-python, python3-setuptools, python3-all, debhelper, python3-pil, python3-pdfrw, help2man, python3-pkg-resources +Standards-Version: 3.9.6 +Homepage: https://gitlab.mister-muffin.de/josch/img2pdf + +Package: img2pdf +Architecture: all +Section: utils +Depends: ${misc:Depends}, ${python3:Depends}, python3-img2pdf +Description: Lossless conversion of raster images to PDF + This program will take a list of raster images and produce a PDF file with the + images embedded in it. JPEG and JPEG2000 images will be included without + recompression. Raster images in other formats will be included with zip/flate + encoding which usually leads to an increase in the resulting size because + formats like png compress better than PDF which just zip/flate compresses the + RGB data. As a result, this tool is able to losslessly wrap images into a PDF + container with a quality to filesize ratio that is typically better (in case + of JPEG and JPEG2000 images) or equal (in case of other formats) than that of + existing tools. + +Package: python3-img2pdf +Architecture: all +Depends: ${misc:Depends}, ${python3:Depends} +Suggests: python3-pdfrw +Description: Lossless conversion of raster images to PDF + This module will take a list of raster images and produce a PDF file with the + images embedded in it. JPEG and JPEG2000 images will be included without + recompression. Raster images in other formats will be included with zip/flate + encoding which usually leads to an increase in the resulting size because + formats like png compress better than PDF which just zip/flate compresses the + RGB data. As a result, this module is able to losslessly wrap images into a + PDF container with a quality to filesize ratio that is typically better (in + case of JPEG and JPEG2000 images) or equal (in case of other formats) than + that of existing tools. + . + Img2pdf includes its own PDF writer but will use the pdfrw module if + available instead. diff --git a/copyright b/copyright new file mode 100644 index 0000000..2458201 --- /dev/null +++ b/copyright @@ -0,0 +1,36 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: img2pdf +Source: https://gitlab.mister-muffin.de/josch/img2pdf + +Files: * +Copyright: 2014 Johannes Schauer +License: LGPL-3 + This program is free software; you can redistribute it + and/or modify it under the terms of the Lesser GNU General Public + License as published by the Free Software Foundation version 3. + . + On Debian systems, the full text of the Lesser GNU General Public + License version 3 can be found in the file + `/usr/share/common-licenses/LGPL-3'. + +Files: src/jp2.py +Copyright: + 2014 Johannes Schauer + KB / National Library of the Netherlands, Open Planets Foundation +License: LGPL-3+ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + . + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + . + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + . + On Debian systems, the full text of the Lesser GNU General Public + License version 3 can be found in the file + `/usr/share/common-licenses/LGPL-3'. diff --git a/img2pdf.install b/img2pdf.install new file mode 100644 index 0000000..2f5f524 --- /dev/null +++ b/img2pdf.install @@ -0,0 +1 @@ +usr/bin/img2pdf diff --git a/img2pdf.manpages b/img2pdf.manpages new file mode 100644 index 0000000..54b8e2b --- /dev/null +++ b/img2pdf.manpages @@ -0,0 +1 @@ +img2pdf.1 diff --git a/python3-img2pdf.install b/python3-img2pdf.install new file mode 100644 index 0000000..6845771 --- /dev/null +++ b/python3-img2pdf.install @@ -0,0 +1 @@ +usr/lib diff --git a/python3-jp2.substvars b/python3-jp2.substvars new file mode 100644 index 0000000..978fc8b --- /dev/null +++ b/python3-jp2.substvars @@ -0,0 +1,2 @@ +misc:Depends= +misc:Pre-Depends= diff --git a/rules b/rules new file mode 100755 index 0000000..43f4ec5 --- /dev/null +++ b/rules @@ -0,0 +1,13 @@ +#!/usr/bin/make -f + +%: + dh $@ --with python3 --buildsystem=pybuild + +override_dh_auto_clean: + rm -rf src/img2pdf.egg-info + rm -f img2pdf.1 + dh_auto_clean + +override_dh_auto_build: + dh_auto_build + help2man --no-info --name="lossless conversion of raster images to pdf" ./src/img2pdf.py -o img2pdf.1 diff --git a/source/format b/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/watch b/watch new file mode 100644 index 0000000..d69d385 --- /dev/null +++ b/watch @@ -0,0 +1,4 @@ +# please also check http://pypi.debian.net/img2pdf/watch +version=3 +opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ +http://pypi.debian.net/img2pdf/img2pdf-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) -- cgit v1.2.3 From 65960d846bed2bbc1a83269b715bb78e83e36478 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Tue, 16 Aug 2016 16:35:49 +0200 Subject: Import img2pdf_0.2.1.orig.tar.gz [dgit import orig img2pdf_0.2.1.orig.tar.gz] --- MANIFEST.in | 6 + PKG-INFO | 164 +++ README.md | 141 +++ setup.cfg | 8 + setup.py | 42 + src/img2pdf.egg-info/PKG-INFO | 164 +++ src/img2pdf.egg-info/SOURCES.txt | 23 + src/img2pdf.egg-info/dependency_links.txt | 1 + src/img2pdf.egg-info/entry_points.txt | 4 + src/img2pdf.egg-info/pbr.json | 1 + src/img2pdf.egg-info/requires.txt | 1 + src/img2pdf.egg-info/top_level.txt | 2 + src/img2pdf.egg-info/zip-safe | 1 + src/img2pdf.py | 1650 +++++++++++++++++++++++++++++ src/jp2.py | 124 +++ src/tests/__init__.py | 557 ++++++++++ src/tests/input/CMYK.jpg | Bin 0 -> 4788 bytes src/tests/input/normal.jpg | Bin 0 -> 2348 bytes src/tests/input/normal.png | Bin 0 -> 1130 bytes src/tests/output/CMYK.jpg.pdf | Bin 0 -> 5560 bytes src/tests/output/CMYK.tif.pdf | Bin 0 -> 1724 bytes src/tests/output/normal.jpg.pdf | Bin 0 -> 3091 bytes src/tests/output/normal.png.pdf | Bin 0 -> 1573 bytes test_comp.sh | 32 + 24 files changed, 2921 insertions(+) create mode 100644 MANIFEST.in create mode 100644 PKG-INFO create mode 100644 README.md create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 src/img2pdf.egg-info/PKG-INFO create mode 100644 src/img2pdf.egg-info/SOURCES.txt create mode 100644 src/img2pdf.egg-info/dependency_links.txt create mode 100644 src/img2pdf.egg-info/entry_points.txt create mode 100644 src/img2pdf.egg-info/pbr.json create mode 100644 src/img2pdf.egg-info/requires.txt create mode 100644 src/img2pdf.egg-info/top_level.txt create mode 100644 src/img2pdf.egg-info/zip-safe create mode 100755 src/img2pdf.py create mode 100644 src/jp2.py create mode 100644 src/tests/__init__.py create mode 100644 src/tests/input/CMYK.jpg create mode 100644 src/tests/input/normal.jpg create mode 100644 src/tests/input/normal.png create mode 100644 src/tests/output/CMYK.jpg.pdf create mode 100644 src/tests/output/CMYK.tif.pdf create mode 100644 src/tests/output/normal.jpg.pdf create mode 100644 src/tests/output/normal.png.pdf create mode 100755 test_comp.sh diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..534bab3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +include README.md +include test_comp.sh +recursive-include src *.jpg +recursive-include src *.pdf +recursive-include src *.png +recursive-include src *.py diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..b18e9d6 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,164 @@ +Metadata-Version: 1.1 +Name: img2pdf +Version: 0.2.1 +Summary: Convert images to PDF via direct JPEG inclusion. +Home-page: https://gitlab.mister-muffin.de/josch/img2pdf +Author: Johannes 'josch' Schauer +Author-email: josch@mister-muffin.de +License: LGPL +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1 +Description: img2pdf + ======= + + Losslessly convert raster images to PDF. The file size will not unnecessarily + increase. One major application would be a number of scans made in JPEG format + which should now become part of a single PDF document. Existing solutions + would either re-encode the input JPEG files (leading to quality loss) or store + them in the zip/flate format which results into the PDF becoming unnecessarily + large in terms of its file size. + + Background + ---------- + + Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by + embedding them without re-encoding. I wrote this piece of python code. + because I was missing a tool to do this automatically. Img2pdf basically just + wraps JPEG images into the PDF container as they are. + + If you know an existing tool which allows one to embed JPEG and JPEG2000 images + into a PDF container without recompression, please contact me so that I can put + this code into the garbage bin. + + Functionality + ------------- + + This program will take a list of images and produce a PDF file with the images + embedded in it. JPEG and JPEG2000 images will be included without + recompression. Raster images in other formats will be included with zip/flate + encoding which usually leads to an increase in the resulting size because + formats like png compress better than PDF which just zip/flate compresses the + RGB data. As a result, this tool is able to losslessly wrap images into a PDF + container with a quality to filesize ratio that is typically better (in case of + JPEG and JPEG2000 images) or equal (in case of other formats) than that of + existing tools. + + For example, imagemagick will re-encode the input JPEG image (thus changing + its content): + + $ convert img.jpg img.pdf + $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression + $ compare -metric AE img.jpg img.extr-000.ppm null: + 1.6301e+06 + + If one wants to losslessly convert from any format to PDF with + imagemagick, one has to use zip compression: + + $ convert input.jpg -compress Zip output.pdf + $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression + $ compare -metric AE img.jpg img.extr-000.ppm null: + 0 + + However, this approach will result in PDF files that are a few times larger + than the input JPEG or JPEG2000 file. + + img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF + container without additional overhead (aside from the PDF structure itself), + save other graphics formats using lossless zip compression, and produce + multi-page PDF files when more than one input image is given. + + Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf + is several times faster than with other tools. + + Usage + ----- + + The images must be provided as files because img2pdf needs to seek in the file + descriptor. + + If no output file is specified with the `-o`/`--output` option, output will be + done to stdout. + + The detailed documentation can be accessed by running: + + img2pdf --help + + + Bugs + ---- + + If you find a JPEG or JPEG2000 file that, when embedded cannot be read + by the Adobe Acrobat Reader, please contact me. + + For lossless conversion of formats other than JPEG or JPEG2000, zip/flate + encoding is used. This choice is based on tests I did with a number of images. + I converted them into PDF using the lossless variants of the compression + formats offered by imagemagick. In all my tests, zip/flate encoding performed + best. You can verify my findings using the test_comp.sh script with any input + image given as a commandline argument. If you find an input file that is + outperformed by another lossless compression method, contact me. + + I have not yet figured out how to determine the colorspace of JPEG2000 files. + Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with + other colorspaces, you must explicitly specify it using the `--colorspace` + option. + + It might be possible to store transparency using masks but it is not clear + what the utility of such a functionality would be. + + Most vector graphic formats can be losslessly turned into PDF (minus some of + the features unsupported by PDF) but img2pdf will currently turn vector + graphics into their lossy raster representations. For converting raster + graphics to PDF, use another tool like inkscape and then join the resulting + pages with a tool like pdftk. + + A configuration file could be used for default options. + + Installation + ------------ + + On a Debian- and Ubuntu-based systems, dependencies may be installed + with the following command: + + apt-get install python3 python3-pil python3-setuptools + + You can then install the package using: + + $ pip install img2pdf + + If you prefer to install from source code use: + + $ cd img2pdf/ + $ pip install . + + To test the console script without installing the package on your system, + use virtualenv: + + $ cd img2pdf/ + $ virtualenv ve + $ ve/bin/pip install . + + You can then test the converter using: + + $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg + + The package can also be used as a library: + + import img2pdf + pdf_bytes = img2pdf.convert('test.jpg') + + file = open("name.pdf","wb") + file.write(pdf_bytes) + +Keywords: jpeg pdf converter +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Other Audience +Classifier: Environment :: Console +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) +Classifier: Natural Language :: English +Classifier: Operating System :: OS Independent diff --git a/README.md b/README.md new file mode 100644 index 0000000..27637d6 --- /dev/null +++ b/README.md @@ -0,0 +1,141 @@ +img2pdf +======= + +Losslessly convert raster images to PDF. The file size will not unnecessarily +increase. One major application would be a number of scans made in JPEG format +which should now become part of a single PDF document. Existing solutions +would either re-encode the input JPEG files (leading to quality loss) or store +them in the zip/flate format which results into the PDF becoming unnecessarily +large in terms of its file size. + +Background +---------- + +Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by +embedding them without re-encoding. I wrote this piece of python code. +because I was missing a tool to do this automatically. Img2pdf basically just +wraps JPEG images into the PDF container as they are. + +If you know an existing tool which allows one to embed JPEG and JPEG2000 images +into a PDF container without recompression, please contact me so that I can put +this code into the garbage bin. + +Functionality +------------- + +This program will take a list of images and produce a PDF file with the images +embedded in it. JPEG and JPEG2000 images will be included without +recompression. Raster images in other formats will be included with zip/flate +encoding which usually leads to an increase in the resulting size because +formats like png compress better than PDF which just zip/flate compresses the +RGB data. As a result, this tool is able to losslessly wrap images into a PDF +container with a quality to filesize ratio that is typically better (in case of +JPEG and JPEG2000 images) or equal (in case of other formats) than that of +existing tools. + +For example, imagemagick will re-encode the input JPEG image (thus changing +its content): + + $ convert img.jpg img.pdf + $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression + $ compare -metric AE img.jpg img.extr-000.ppm null: + 1.6301e+06 + +If one wants to losslessly convert from any format to PDF with +imagemagick, one has to use zip compression: + + $ convert input.jpg -compress Zip output.pdf + $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression + $ compare -metric AE img.jpg img.extr-000.ppm null: + 0 + +However, this approach will result in PDF files that are a few times larger +than the input JPEG or JPEG2000 file. + +img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF +container without additional overhead (aside from the PDF structure itself), +save other graphics formats using lossless zip compression, and produce +multi-page PDF files when more than one input image is given. + +Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf +is several times faster than with other tools. + +Usage +----- + +The images must be provided as files because img2pdf needs to seek in the file +descriptor. + +If no output file is specified with the `-o`/`--output` option, output will be +done to stdout. + +The detailed documentation can be accessed by running: + + img2pdf --help + + +Bugs +---- + +If you find a JPEG or JPEG2000 file that, when embedded cannot be read +by the Adobe Acrobat Reader, please contact me. + +For lossless conversion of formats other than JPEG or JPEG2000, zip/flate +encoding is used. This choice is based on tests I did with a number of images. +I converted them into PDF using the lossless variants of the compression +formats offered by imagemagick. In all my tests, zip/flate encoding performed +best. You can verify my findings using the test_comp.sh script with any input +image given as a commandline argument. If you find an input file that is +outperformed by another lossless compression method, contact me. + +I have not yet figured out how to determine the colorspace of JPEG2000 files. +Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with +other colorspaces, you must explicitly specify it using the `--colorspace` +option. + +It might be possible to store transparency using masks but it is not clear +what the utility of such a functionality would be. + +Most vector graphic formats can be losslessly turned into PDF (minus some of +the features unsupported by PDF) but img2pdf will currently turn vector +graphics into their lossy raster representations. For converting raster +graphics to PDF, use another tool like inkscape and then join the resulting +pages with a tool like pdftk. + +A configuration file could be used for default options. + +Installation +------------ + +On a Debian- and Ubuntu-based systems, dependencies may be installed +with the following command: + + apt-get install python3 python3-pil python3-setuptools + +You can then install the package using: + + $ pip install img2pdf + +If you prefer to install from source code use: + + $ cd img2pdf/ + $ pip install . + +To test the console script without installing the package on your system, +use virtualenv: + + $ cd img2pdf/ + $ virtualenv ve + $ ve/bin/pip install . + +You can then test the converter using: + + $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg + +The package can also be used as a library: + + import img2pdf + pdf_bytes = img2pdf.convert('test.jpg') + + file = open("name.pdf","wb") + file.write(pdf_bytes) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..8c9157d --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[metadata] +description-file = README.md + +[egg_info] +tag_build = +tag_date = 0 +tag_svn_revision = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..50de1e6 --- /dev/null +++ b/setup.py @@ -0,0 +1,42 @@ +from setuptools import setup + +VERSION = "0.2.1" + +setup( + name='img2pdf', + version=VERSION, + author="Johannes 'josch' Schauer", + author_email='josch@mister-muffin.de', + description="Convert images to PDF via direct JPEG inclusion.", + long_description=open('README.md').read(), + license="LGPL", + keywords="jpeg pdf converter", + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Other Audience', + 'Environment :: Console', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: Implementation :: CPython', + 'License :: OSI Approved :: GNU Lesser General Public License v3 ' + '(LGPLv3)', + 'Natural Language :: English', + 'Operating System :: OS Independent'], + url='https://gitlab.mister-muffin.de/josch/img2pdf', + download_url='https://gitlab.mister-muffin.de/josch/img2pdf/repository/' + 'archive.tar.gz?ref=' + VERSION, + package_dir={"": "src"}, + py_modules=['img2pdf', 'jp2'], + include_package_data=True, + test_suite='tests.test_suite', + zip_safe=True, + install_requires=( + 'Pillow', + ), + entry_points=''' + [console_scripts] + img2pdf = img2pdf:main + ''', + ) diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO new file mode 100644 index 0000000..b18e9d6 --- /dev/null +++ b/src/img2pdf.egg-info/PKG-INFO @@ -0,0 +1,164 @@ +Metadata-Version: 1.1 +Name: img2pdf +Version: 0.2.1 +Summary: Convert images to PDF via direct JPEG inclusion. +Home-page: https://gitlab.mister-muffin.de/josch/img2pdf +Author: Johannes 'josch' Schauer +Author-email: josch@mister-muffin.de +License: LGPL +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1 +Description: img2pdf + ======= + + Losslessly convert raster images to PDF. The file size will not unnecessarily + increase. One major application would be a number of scans made in JPEG format + which should now become part of a single PDF document. Existing solutions + would either re-encode the input JPEG files (leading to quality loss) or store + them in the zip/flate format which results into the PDF becoming unnecessarily + large in terms of its file size. + + Background + ---------- + + Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by + embedding them without re-encoding. I wrote this piece of python code. + because I was missing a tool to do this automatically. Img2pdf basically just + wraps JPEG images into the PDF container as they are. + + If you know an existing tool which allows one to embed JPEG and JPEG2000 images + into a PDF container without recompression, please contact me so that I can put + this code into the garbage bin. + + Functionality + ------------- + + This program will take a list of images and produce a PDF file with the images + embedded in it. JPEG and JPEG2000 images will be included without + recompression. Raster images in other formats will be included with zip/flate + encoding which usually leads to an increase in the resulting size because + formats like png compress better than PDF which just zip/flate compresses the + RGB data. As a result, this tool is able to losslessly wrap images into a PDF + container with a quality to filesize ratio that is typically better (in case of + JPEG and JPEG2000 images) or equal (in case of other formats) than that of + existing tools. + + For example, imagemagick will re-encode the input JPEG image (thus changing + its content): + + $ convert img.jpg img.pdf + $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression + $ compare -metric AE img.jpg img.extr-000.ppm null: + 1.6301e+06 + + If one wants to losslessly convert from any format to PDF with + imagemagick, one has to use zip compression: + + $ convert input.jpg -compress Zip output.pdf + $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression + $ compare -metric AE img.jpg img.extr-000.ppm null: + 0 + + However, this approach will result in PDF files that are a few times larger + than the input JPEG or JPEG2000 file. + + img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF + container without additional overhead (aside from the PDF structure itself), + save other graphics formats using lossless zip compression, and produce + multi-page PDF files when more than one input image is given. + + Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf + is several times faster than with other tools. + + Usage + ----- + + The images must be provided as files because img2pdf needs to seek in the file + descriptor. + + If no output file is specified with the `-o`/`--output` option, output will be + done to stdout. + + The detailed documentation can be accessed by running: + + img2pdf --help + + + Bugs + ---- + + If you find a JPEG or JPEG2000 file that, when embedded cannot be read + by the Adobe Acrobat Reader, please contact me. + + For lossless conversion of formats other than JPEG or JPEG2000, zip/flate + encoding is used. This choice is based on tests I did with a number of images. + I converted them into PDF using the lossless variants of the compression + formats offered by imagemagick. In all my tests, zip/flate encoding performed + best. You can verify my findings using the test_comp.sh script with any input + image given as a commandline argument. If you find an input file that is + outperformed by another lossless compression method, contact me. + + I have not yet figured out how to determine the colorspace of JPEG2000 files. + Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with + other colorspaces, you must explicitly specify it using the `--colorspace` + option. + + It might be possible to store transparency using masks but it is not clear + what the utility of such a functionality would be. + + Most vector graphic formats can be losslessly turned into PDF (minus some of + the features unsupported by PDF) but img2pdf will currently turn vector + graphics into their lossy raster representations. For converting raster + graphics to PDF, use another tool like inkscape and then join the resulting + pages with a tool like pdftk. + + A configuration file could be used for default options. + + Installation + ------------ + + On a Debian- and Ubuntu-based systems, dependencies may be installed + with the following command: + + apt-get install python3 python3-pil python3-setuptools + + You can then install the package using: + + $ pip install img2pdf + + If you prefer to install from source code use: + + $ cd img2pdf/ + $ pip install . + + To test the console script without installing the package on your system, + use virtualenv: + + $ cd img2pdf/ + $ virtualenv ve + $ ve/bin/pip install . + + You can then test the converter using: + + $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg + + The package can also be used as a library: + + import img2pdf + pdf_bytes = img2pdf.convert('test.jpg') + + file = open("name.pdf","wb") + file.write(pdf_bytes) + +Keywords: jpeg pdf converter +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Other Audience +Classifier: Environment :: Console +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) +Classifier: Natural Language :: English +Classifier: Operating System :: OS Independent diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt new file mode 100644 index 0000000..192589d --- /dev/null +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -0,0 +1,23 @@ +MANIFEST.in +README.md +setup.cfg +setup.py +test_comp.sh +src/img2pdf.py +src/jp2.py +src/img2pdf.egg-info/PKG-INFO +src/img2pdf.egg-info/SOURCES.txt +src/img2pdf.egg-info/dependency_links.txt +src/img2pdf.egg-info/entry_points.txt +src/img2pdf.egg-info/pbr.json +src/img2pdf.egg-info/requires.txt +src/img2pdf.egg-info/top_level.txt +src/img2pdf.egg-info/zip-safe +src/tests/__init__.py +src/tests/input/CMYK.jpg +src/tests/input/normal.jpg +src/tests/input/normal.png +src/tests/output/CMYK.jpg.pdf +src/tests/output/CMYK.tif.pdf +src/tests/output/normal.jpg.pdf +src/tests/output/normal.png.pdf \ No newline at end of file diff --git a/src/img2pdf.egg-info/dependency_links.txt b/src/img2pdf.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/img2pdf.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/img2pdf.egg-info/entry_points.txt b/src/img2pdf.egg-info/entry_points.txt new file mode 100644 index 0000000..59301dc --- /dev/null +++ b/src/img2pdf.egg-info/entry_points.txt @@ -0,0 +1,4 @@ + + [console_scripts] + img2pdf = img2pdf:main + \ No newline at end of file diff --git a/src/img2pdf.egg-info/pbr.json b/src/img2pdf.egg-info/pbr.json new file mode 100644 index 0000000..bc27bf9 --- /dev/null +++ b/src/img2pdf.egg-info/pbr.json @@ -0,0 +1 @@ +{"is_release": false, "git_version": "d78b2cb"} \ No newline at end of file diff --git a/src/img2pdf.egg-info/requires.txt b/src/img2pdf.egg-info/requires.txt new file mode 100644 index 0000000..7e2fba5 --- /dev/null +++ b/src/img2pdf.egg-info/requires.txt @@ -0,0 +1 @@ +Pillow diff --git a/src/img2pdf.egg-info/top_level.txt b/src/img2pdf.egg-info/top_level.txt new file mode 100644 index 0000000..0636fd7 --- /dev/null +++ b/src/img2pdf.egg-info/top_level.txt @@ -0,0 +1,2 @@ +img2pdf +jp2 diff --git a/src/img2pdf.egg-info/zip-safe b/src/img2pdf.egg-info/zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/img2pdf.egg-info/zip-safe @@ -0,0 +1 @@ + diff --git a/src/img2pdf.py b/src/img2pdf.py new file mode 100755 index 0000000..2042d13 --- /dev/null +++ b/src/img2pdf.py @@ -0,0 +1,1650 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2012-2014 Johannes 'josch' Schauer +# +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation, either +# version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program. If not, see +# . + +import sys +import os +import zlib +import argparse +from PIL import Image +from datetime import datetime +from jp2 import parsejp2 +from enum import Enum +from io import BytesIO +import logging + +__version__ = "0.2.1" +default_dpi = 96.0 +papersizes = { + "letter": "8.5inx11in", + "a0": "841mmx1189mm", + "a1": "594mmx841mm", + "a2": "420mmx594mm", + "a3": "297mmx420mm", + "a4": "210mmx297mm", + "a5": "148mmx210mm", + "a6": "105mmx148mm", +} +papernames = { + "letter": "Letter", + "a0": "A0", + "a1": "A1", + "a2": "A2", + "a3": "A3", + "a4": "A4", + "a5": "A5", + "a6": "A6", +} + + +FitMode = Enum('FitMode', 'into fill exact shrink enlarge') + +PageOrientation = Enum('PageOrientation', 'portrait landscape') + +Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other') + +ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 other') + +PageMode = Enum('PageMode', 'none outlines thumbs') + +PageLayout = Enum('PageLayout', + 'single onecolumn twocolumnright twocolumnleft') + +Magnification = Enum('Magnification', 'fit fith fitbh') + +ImgSize = Enum('ImgSize', 'abs perc dpi') + +Unit = Enum('Unit', 'pt cm mm inch') + +ImgUnit = Enum('ImgUnit', 'pt cm mm inch perc dpi') + + +class NegativeDimensionError(Exception): + pass + + +class UnsupportedColorspaceError(Exception): + pass + + +class ImageOpenError(Exception): + pass + + +class JpegColorspaceError(Exception): + pass + + +class PdfTooLargeError(Exception): + pass + + +# without pdfrw this function is a no-op +def my_convert_load(string): + return string + + +def parse(cont, indent=1): + if type(cont) is dict: + return b"<<\n"+b"\n".join( + [4 * indent * b" " + k + b" " + parse(v, indent+1) + for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>" + elif type(cont) is int: + return str(cont).encode() + elif type(cont) is float: + if int(cont) == cont: + return parse(int(cont)) + else: + return ("%0.4f" % cont).rstrip("0").encode() + elif isinstance(cont, MyPdfDict): + # if cont got an identifier, then addobj() has been called with it + # and a link to it will be added, otherwise add it inline + if hasattr(cont, "identifier"): + return ("%d 0 R" % cont.identifier).encode() + else: + return parse(cont.content, indent) + elif type(cont) is str or isinstance(cont, bytes): + if type(cont) is str and type(cont) is not bytes: + raise TypeError( + "parse must be passed a bytes object in py3. Got: %s" % cont) + return cont + elif isinstance(cont, list): + return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]" + else: + raise TypeError("cannot handle type %s with content %s" % (type(cont), + cont)) + + +class MyPdfDict(object): + def __init__(self, *args, **kw): + self.content = dict() + if args: + if len(args) == 1: + args = args[0] + self.content.update(args) + self.stream = None + for key, value in kw.items(): + if key == "stream": + self.stream = value + self.content[MyPdfName.Length] = len(value) + elif key == "indirect": + pass + else: + self.content[getattr(MyPdfName, key)] = value + + def tostring(self): + if self.stream is not None: + return ( + ("%d 0 obj\n" % self.identifier).encode() + + parse(self.content) + + b"\nstream\n" + self.stream + b"\nendstream\nendobj\n") + else: + return ("%d 0 obj\n" % self.identifier).encode() + \ + parse(self.content) + b"\nendobj\n" + + def __setitem__(self, key, value): + self.content[key] = value + + def __getitem__(self, key): + return self.content[key] + + +class MyPdfName(): + def __getattr__(self, name): + return b'/' + name.encode('ascii') +MyPdfName = MyPdfName() + + +class MyPdfObject(bytes): + def __new__(cls, string): + return bytes.__new__(cls, string.encode('ascii')) + + +class MyPdfArray(list): + pass + + +class MyPdfWriter(): + def __init__(self, version="1.3"): + self.objects = [] + # create an incomplete pages object so that a /Parent entry can be + # added to each page + self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0) + self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog) + self.version = version # default pdf version 1.3 + self.pagearray = [] + + def addobj(self, obj): + newid = len(self.objects)+1 + obj.identifier = newid + self.objects.append(obj) + + def tostream(self, info, stream): + xreftable = list() + + # justification of the random binary garbage in the header from + # adobe: + # + # > Note: If a PDF file contains binary data, as most do (see Section + # > 3.1, “Lexical Conventions”), it is recommended that the header + # > line be immediately followed by a comment line containing at + # > least four binary characters—that is, characters whose codes are + # > 128 or greater. This ensures proper behavior of file transfer + # > applications that inspect data near the beginning of a file to + # > determine whether to treat the file’s contents as text or as + # > binary. + # + # the choice of binary characters is arbitrary but those four seem to + # be used elsewhere. + pdfheader = ('%%PDF-%s\n' % self.version).encode('ascii') + pdfheader += b'%\xe2\xe3\xcf\xd3\n' + stream.write(pdfheader) + + # From section 3.4.3 of the PDF Reference (version 1.7): + # + # > Each entry is exactly 20 bytes long, including the end-of-line + # > marker. + # > + # > [...] + # > + # > The format of an in-use entry is + # > nnnnnnnnnn ggggg n eol + # > where + # > nnnnnnnnnn is a 10-digit byte offset + # > ggggg is a 5-digit generation number + # > n is a literal keyword identifying this as an in-use entry + # > eol is a 2-character end-of-line sequence + # > + # > [...] + # > + # > If the file’s end-of-line marker is a single character (either a + # > carriage return or a line feed), it is preceded by a single space; + # + # Since we chose to use a single character eol marker, we precede it by + # a space + pos = len(pdfheader) + xreftable.append(b"0000000000 65535 f \n") + for o in self.objects: + xreftable.append(("%010d 00000 n \n" % pos).encode()) + content = o.tostring() + stream.write(content) + pos += len(content) + + xrefoffset = pos + stream.write(b"xref\n") + stream.write(("0 %d\n" % len(xreftable)).encode()) + for x in xreftable: + stream.write(x) + stream.write(b"trailer\n") + stream.write(parse({b"/Size": len(xreftable), b"/Info": info, + b"/Root": self.catalog})+b"\n") + stream.write(b"startxref\n") + stream.write(("%d\n" % xrefoffset).encode()) + stream.write(b"%%EOF\n") + return + + def addpage(self, page): + page[b"/Parent"] = self.pages + self.pagearray.append(page) + self.pages.content[b"/Kids"].append(page) + self.pages.content[b"/Count"] += 1 + self.addobj(page) + + +class MyPdfString(): + @classmethod + def encode(cls, string): + try: + string = string.encode('ascii') + except UnicodeEncodeError: + string = b"\xfe\xff"+string.encode("utf-16-be") + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') + return b'(' + string + b')' + + +class pdfdoc(object): + def __init__(self, version="1.3", title=None, author=None, creator=None, + producer=None, creationdate=None, moddate=None, subject=None, + keywords=None, nodate=False, panes=None, initial_page=None, + magnification=None, page_layout=None, fit_window=False, + center_window=False, fullscreen=False, with_pdfrw=True): + if with_pdfrw: + try: + from pdfrw import PdfWriter, PdfDict, PdfName, PdfString + self.with_pdfrw = True + except ImportError: + PdfWriter = MyPdfWriter + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfString = MyPdfString + self.with_pdfrw = False + else: + PdfWriter = MyPdfWriter + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfString = MyPdfString + self.with_pdfrw = False + + now = datetime.now() + self.info = PdfDict(indirect=True) + + def datetime_to_pdfdate(dt): + return dt.strftime("%Y%m%d%H%M%SZ") + + if title is not None: + self.info[PdfName.Title] = PdfString.encode(title) + if author is not None: + self.info[PdfName.Author] = PdfString.encode(author) + if creator is not None: + self.info[PdfName.Creator] = PdfString.encode(creator) + if producer is not None: + self.info[PdfName.Producer] = PdfString.encode(producer) + if creationdate is not None: + self.info[PdfName.CreationDate] = \ + PdfString.encode("D:"+datetime_to_pdfdate(creationdate)) + elif not nodate: + self.info[PdfName.CreationDate] = \ + PdfString.encode("D:"+datetime_to_pdfdate(now)) + if moddate is not None: + self.info[PdfName.ModDate] = \ + PdfString.encode("D:"+datetime_to_pdfdate(moddate)) + elif not nodate: + self.info[PdfName.ModDate] = PdfString.encode( + "D:"+datetime_to_pdfdate(now)) + if subject is not None: + self.info[PdfName.Subject] = PdfString.encode(subject) + if keywords is not None: + self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords)) + + self.writer = PdfWriter() + self.writer.version = version + # this is done because pdfrw adds info, catalog and pages as the first + # three objects in this order + if not self.with_pdfrw: + self.writer.addobj(self.info) + self.writer.addobj(self.writer.catalog) + self.writer.addobj(self.writer.pages) + + self.panes = panes + self.initial_page = initial_page + self.magnification = magnification + self.page_layout = page_layout + self.fit_window = fit_window + self.center_window = center_window + self.fullscreen = fullscreen + + def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata, + imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, + pageheight): + if self.with_pdfrw: + from pdfrw import PdfDict, PdfName + from pdfrw.py23_diffs import convert_load + else: + PdfDict = MyPdfDict + PdfName = MyPdfName + convert_load = my_convert_load + + if color == Colorspace.L: + colorspace = PdfName.DeviceGray + elif color == Colorspace.RGB: + colorspace = PdfName.DeviceRGB + elif color == Colorspace.CMYK or color == Colorspace['CMYK;I']: + colorspace = PdfName.DeviceCMYK + else: + raise UnsupportedColorspaceError("unsupported color space: %s" + % color.name) + + # either embed the whole jpeg or deflate the bitmap representation + if imgformat is ImageFormat.JPEG: + ofilter = [PdfName.DCTDecode] + elif imgformat is ImageFormat.JPEG2000: + ofilter = [PdfName.JPXDecode] + self.writer.version = "1.5" # jpeg2000 needs pdf 1.5 + else: + ofilter = [PdfName.FlateDecode] + + image = PdfDict(stream=convert_load(imgdata)) + + image[PdfName.Type] = PdfName.XObject + image[PdfName.Subtype] = PdfName.Image + image[PdfName.Filter] = ofilter + image[PdfName.Width] = imgwidthpx + image[PdfName.Height] = imgheightpx + image[PdfName.ColorSpace] = colorspace + # hardcoded as PIL doesn't provide bits for non-jpeg formats + image[PdfName.BitsPerComponent] = 8 + + if color == Colorspace['CMYK;I']: + # Inverts all four channels + image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0] + + text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" % + (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii") + + content = PdfDict(stream=convert_load(text)) + resources = PdfDict(XObject=PdfDict(Im0=image)) + + page = PdfDict(indirect=True) + page[PdfName.Type] = PdfName.Page + page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight] + page[PdfName.Resources] = resources + page[PdfName.Contents] = content + + self.writer.addpage(page) + + if not self.with_pdfrw: + self.writer.addobj(content) + self.writer.addobj(image) + + def tostring(self): + stream = BytesIO() + self.tostream(stream) + return stream.getvalue() + + def tostream(self, outputstream): + if self.with_pdfrw: + from pdfrw import PdfDict, PdfName, PdfArray, PdfObject + else: + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfObject = MyPdfObject + PdfArray = MyPdfArray + NullObject = PdfObject('null') + TrueObject = PdfObject('true') + + # We fill the catalog with more information like /ViewerPreferences, + # /PageMode, /PageLayout or /OpenAction because the latter refers to a + # page object which has to be present so that we can get its id. + # + # Furthermore, if using pdfrw, the trailer is cleared every time a page + # is added, so we can only start using it after all pages have been + # written. + + if self.with_pdfrw: + catalog = self.writer.trailer.Root + else: + catalog = self.writer.catalog + + if self.fullscreen or self.fit_window or self.center_window or \ + self.panes is not None: + catalog[PdfName.ViewerPreferences] = PdfDict() + + if self.fullscreen: + # this setting might be overwritten later by the page mode + catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \ + = PdfName.UseNone + + if self.panes == PageMode.thumbs: + catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \ + = PdfName.UseThumbs + # this setting might be overwritten later if fullscreen + catalog[PdfName.PageMode] = PdfName.UseThumbs + elif self.panes == PageMode.outlines: + catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \ + = PdfName.UseOutlines + # this setting might be overwritten later if fullscreen + catalog[PdfName.PageMode] = PdfName.UseOutlines + elif self.panes in [PageMode.none, None]: + pass + else: + raise ValueError("unknown page mode: %s" % self.panes) + + if self.fit_window: + catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject + + if self.center_window: + catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = \ + TrueObject + + if self.fullscreen: + catalog[PdfName.PageMode] = PdfName.FullScreen + + # see table 8.2 in section 8.2.1 in + # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf + # Fit - Fits the page to the window. + # FitH - Fits the width of the page to the window. + # FitV - Fits the height of the page to the window. + # FitR - Fits the rectangle specified by the four coordinates to the + # window. + # FitB - Fits the page bounding box to the window. This basically + # reduces the amount of whitespace (margins) that is displayed + # and thus focussing more on the text content. + # FitBH - Fits the width of the page bounding box to the window. + # FitBV - Fits the height of the page bounding box to the window. + + # by default the initial page is the first one + initial_page = self.writer.pagearray[0] + # we set the open action here to make sure we open on the requested + # initial page but this value might be overwritten by a custom open + # action later while still taking the requested initial page into + # account + if self.initial_page is not None: + initial_page = self.writer.pagearray[self.initial_page - 1] + catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.XYZ, + NullObject, NullObject, 0]) + + if self.magnification == Magnification.fit: + catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit]) + elif self.magnification == Magnification.fith: + pagewidth = initial_page[PdfName.MediaBox][2] + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.FitH, pagewidth]) + elif self.magnification == Magnification.fitbh: + # quick hack to determine the image width on the page + imgwidth = float(initial_page[PdfName.Contents].stream.split()[4]) + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.FitBH, imgwidth]) + elif isinstance(self.magnification, float): + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.XYZ, NullObject, NullObject, + self.magnification]) + elif self.magnification is None: + pass + else: + raise ValueError("unknown magnification: %s" % self.magnification) + + if self.page_layout == PageLayout.single: + catalog[PdfName.PageLayout] = PdfName.SinglePage + elif self.page_layout == PageLayout.onecolumn: + catalog[PdfName.PageLayout] = PdfName.OneColumn + elif self.page_layout == PageLayout.twocolumnright: + catalog[PdfName.PageLayout] = PdfName.TwoColumnRight + elif self.page_layout == PageLayout.twocolumnleft: + catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft + elif self.page_layout is None: + pass + else: + raise ValueError("unknown page layout: %s" % self.page_layout) + + # now write out the PDF + if self.with_pdfrw: + self.writer.trailer.Info = self.info + self.writer.write(outputstream) + else: + self.writer.tostream(self.info, outputstream) + + +def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): + if imgformat == ImageFormat.JPEG2000 \ + and rawdata is not None and imgdata is None: + # this codepath gets called if the PIL installation is not able to + # handle JPEG2000 files + imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata) + + if hdpi is None: + hdpi = default_dpi + if vdpi is None: + vdpi = default_dpi + ndpi = (hdpi, vdpi) + else: + imgwidthpx, imgheightpx = imgdata.size + + ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi)) + # In python3, the returned dpi value for some tiff images will + # not be an integer but a float. To make the behaviour of + # img2pdf the same between python2 and python3, we convert that + # float into an integer by rounding. + # Search online for the 72.009 dpi problem for more info. + ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) + ics = imgdata.mode + + logging.debug("input dpi = %d x %d", *ndpi) + + if colorspace: + color = colorspace + logging.debug("input colorspace (forced) = %s", color) + else: + color = None + for c in Colorspace: + if c.name == ics: + color = c + if color is None: + color = Colorspace.other + if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG: + # Adobe inverts CMYK JPEGs for some reason, and others + # have followed suit as well. Some software assumes the + # JPEG is inverted if the Adobe tag (APP14), while other + # software assumes all CMYK JPEGs are inverted. I don't + # have enough experience with these to know which is + # better for images currently in the wild, so I'm going + # with the first approach for now. + if "adobe" in imgdata.info: + color = Colorspace['CMYK;I'] + logging.debug("input colorspace = %s", color.name) + + logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) + + return (color, ndpi, imgwidthpx, imgheightpx) + + +def read_images(rawdata, colorspace, first_frame_only=False): + im = BytesIO(rawdata) + im.seek(0) + imgdata = None + try: + imgdata = Image.open(im) + except IOError as e: + # test if it is a jpeg2000 image + if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + raise ImageOpenError("cannot read input image (not jpeg2000). " + "PIL: error reading image: %s" % e) + # image is jpeg2000 + imgformat = ImageFormat.JPEG2000 + else: + imgformat = None + for f in ImageFormat: + if f.name == imgdata.format: + imgformat = f + if imgformat is None: + imgformat = ImageFormat.other + + logging.debug("imgformat = %s", imgformat.name) + + # depending on the input format, determine whether to pass the raw + # image or the zlib compressed color information + if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: + color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata) + if color == Colorspace['1']: + raise JpegColorspaceError("jpeg can't be monochrome") + if color == Colorspace['P']: + raise JpegColorspaceError("jpeg can't have a color palette") + if color == Colorspace['RGBA']: + raise JpegColorspaceError("jpeg can't have an alpha channel") + im.close() + return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx)] + else: + result = [] + img_page_count = 0 + # loop through all frames of the image (example: multipage TIFF) + while True: + try: + imgdata.seek(img_page_count) + except EOFError: + break + + if first_frame_only and img_page_count > 0: + break + + logging.debug("Converting frame: %d" % img_page_count) + + color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace) + + # because we do not support /CCITTFaxDecode + if color == Colorspace['1']: + logging.debug("Converting colorspace 1 to L") + newimg = imgdata.convert('L') + color = Colorspace.L + elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, + Colorspace["CMYK;I"]]: + logging.debug("Colorspace is OK: %s", color) + newimg = imgdata + elif color in [Colorspace.RGBA, Colorspace.P, Colorspace.other]: + logging.debug("Converting colorspace %s to RGB", color) + newimg = imgdata.convert('RGB') + color = Colorspace.RGB + else: + raise ValueError("unknown colorspace: %s" % color.name) + imggz = zlib.compress(newimg.tobytes()) + result.append((color, ndpi, imgformat, imggz, imgwidthpx, + imgheightpx)) + img_page_count += 1 + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the + # close() method + try: + imgdata.close() + except AttributeError: + pass + im.close() + return result + + +# converts a length in pixels to a length in PDF units (1/72 of an inch) +def px_to_pt(length, dpi): + return 72*length/dpi + + +def cm_to_pt(length): + return (72*length)/2.54 + + +def mm_to_pt(length): + return (72*length)/25.4 + + +def in_to_pt(length): + return 72*length + + +def get_layout_fun(pagesize, imgsize, border, fit, auto_orient): + def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight): + if fitwidth is None and fitheight is None: + raise ValueError("fitwidth and fitheight cannot both be None") + # if fit is fill or enlarge then it is okay if one of the dimensions + # are negative but one of them must still be positive + # if fit is not fill or enlarge then both dimensions must be positive + if fit in [FitMode.fill, FitMode.enlarge] and \ + fitwidth is not None and fitwidth < 0 and \ + fitheight is not None and fitheight < 0: + raise ValueError("cannot fit into a rectangle where both " + "dimensions are negative") + elif fit not in [FitMode.fill, FitMode.enlarge] and \ + ((fitwidth is not None and fitwidth < 0) or + (fitheight is not None and fitheight < 0)): + raise Exception("cannot fit into a rectangle where either " + "dimensions are negative") + + def default(): + if fitwidth is not None and fitheight is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight)/imgwidth + if newimgheight > fitheight: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth)/imgheight + elif fitwidth is None and fitheight is not None: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth)/imgheight + elif fitheight is None and fitwidth is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight)/imgwidth + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return newimgwidth, newimgheight + if fit is None or fit == FitMode.into: + return default() + elif fit == FitMode.fill: + if fitwidth is not None and fitheight is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight)/imgwidth + if newimgheight < fitheight: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth)/imgheight + elif fitwidth is None and fitheight is not None: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth)/imgheight + elif fitheight is None and fitwidth is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight)/imgwidth + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return newimgwidth, newimgheight + elif fit == FitMode.exact: + if fitwidth is not None and fitheight is not None: + return fitwidth, fitheight + elif fitwidth is None and fitheight is not None: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth)/imgheight + elif fitheight is None and fitwidth is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight)/imgwidth + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return newimgwidth, newimgheight + elif fit == FitMode.shrink: + if fitwidth is not None and fitheight is not None: + if imgwidth <= fitwidth and imgheight <= fitheight: + return imgwidth, imgheight + elif fitwidth is None and fitheight is not None: + if imgheight <= fitheight: + return imgwidth, imgheight + elif fitheight is None and fitwidth is not None: + if imgwidth <= fitwidth: + return imgwidth, imgheight + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return default() + elif fit == FitMode.enlarge: + if fitwidth is not None and fitheight is not None: + if imgwidth > fitwidth or imgheight > fitheight: + return imgwidth, imgheight + elif fitwidth is None and fitheight is not None: + if imgheight > fitheight: + return imgwidth, imgheight + elif fitheight is None and fitwidth is not None: + if imgwidth > fitwidth: + return imgwidth, imgheight + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return default() + else: + raise NotImplementedError + # if no layout arguments are given, then the image size is equal to the + # page size and will be drawn with the default dpi + if pagesize is None and imgsize is None and border is None: + return default_layout_fun + if pagesize is None and imgsize is None and border is not None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): + imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0]) + imgheightpdf = px_to_pt(imgheightpx, ndpi[1]) + pagewidth = imgwidthpdf+2*border[1] + pageheight = imgheightpdf+2*border[0] + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun + if border is None: + border = (0, 0) + # if the pagesize is given but the imagesize is not, then the imagesize + # will be calculated from the pagesize, taking into account the border + # and the fitting + if pagesize is not None and imgsize is None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): + if pagesize[0] is not None and pagesize[1] is not None and \ + auto_orient and \ + ((imgwidthpx > imgheightpx and + pagesize[0] < pagesize[1]) or + (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])): + pagewidth, pageheight = pagesize[1], pagesize[0] + newborder = border[1], border[0] + else: + pagewidth, pageheight = pagesize[0], pagesize[1] + newborder = border + if pagewidth is not None: + fitwidth = pagewidth-2*newborder[1] + else: + fitwidth = None + if pageheight is not None: + fitheight = pageheight-2*newborder[0] + else: + fitheight = None + if fit in [FitMode.fill, FitMode.enlarge] and \ + fitwidth is not None and fitwidth < 0 and \ + fitheight is not None and fitheight < 0: + raise NegativeDimensionError( + "at least one border dimension musts be smaller than half " + "the respective page dimension") + elif fit not in [FitMode.fill, FitMode.enlarge] \ + and ((fitwidth is not None and fitwidth < 0) or + (fitheight is not None and fitheight < 0)): + raise NegativeDimensionError( + "one border dimension is larger than half of the " + "respective page dimension") + imgwidthpdf, imgheightpdf = \ + fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + fitwidth, fitheight) + if pagewidth is None: + pagewidth = imgwidthpdf+border[1]*2 + if pageheight is None: + pageheight = imgheightpdf+border[0]*2 + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun + + def scale_imgsize(s, px, dpi): + if s is None: + return None + mode, value = s + if mode == ImgSize.abs: + return value + if mode == ImgSize.perc: + return (px_to_pt(px, dpi)*value)/100 + if mode == ImgSize.dpi: + return px_to_pt(px, value) + raise NotImplementedError + if pagesize is None and imgsize is not None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): + imgwidthpdf, imgheightpdf = \ + fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), + scale_imgsize(imgsize[1], imgheightpx, ndpi[1])) + pagewidth = imgwidthpdf+2*border[1] + pageheight = imgheightpdf+2*border[0] + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun + if pagesize is not None and imgsize is not None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): + if pagesize[0] is not None and pagesize[1] is not None and \ + auto_orient and \ + ((imgwidthpx > imgheightpx and + pagesize[0] < pagesize[1]) or + (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])): + pagewidth, pageheight = pagesize[1], pagesize[0] + else: + pagewidth, pageheight = pagesize[0], pagesize[1] + imgwidthpdf, imgheightpdf = \ + fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), + scale_imgsize(imgsize[1], imgheightpx, ndpi[1])) + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun + raise NotImplementedError + + +def default_layout_fun(imgwidthpx, imgheightpx, ndpi): + imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0]) + imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1]) + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + + +def get_fixed_dpi_layout_fun(fixed_dpi): + """Layout function that overrides whatever DPI is claimed in input images. + + >>> layout_fun = get_fixed_dpi_layout_fun((300, 300)) + >>> convert(image1, layout_fun=layout_fun, ... outputstream=...) + """ + def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi): + return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi) + return fixed_dpi_layout_fun + + +# given one or more input image, depending on outputstream, either return a +# string containing the whole PDF if outputstream is None or write the PDF +# data to the given file-like object and return None +# +# Input images can be given as file like objects (they must implement read()), +# as a binary string representing the image content or as filenames to the +# images. +def convert(*images, title=None, + author=None, creator=None, producer=None, creationdate=None, + moddate=None, subject=None, keywords=None, colorspace=None, + nodate=False, layout_fun=default_layout_fun, viewer_panes=None, + viewer_initial_page=None, viewer_magnification=None, + viewer_page_layout=None, viewer_fit_window=False, + viewer_center_window=False, viewer_fullscreen=False, + with_pdfrw=True, outputstream=None, first_frame_only=False): + + pdf = pdfdoc("1.3", title, author, creator, producer, creationdate, + moddate, subject, keywords, nodate, viewer_panes, + viewer_initial_page, viewer_magnification, viewer_page_layout, + viewer_fit_window, viewer_center_window, viewer_fullscreen, + with_pdfrw) + + for img in images: + # img is allowed to be a path, a binary string representing image data + # or a file-like object (really anything that implements read()) + try: + rawdata = img.read() + except AttributeError: + # the thing doesn't have a read() function, so try if we can treat + # it as a file name + try: + with open(img, "rb") as f: + rawdata = f.read() + except: + # whatever the exception is (string could contain NUL + # characters or the path could just not exist) it's not a file + # name so we now try treating it as raw image content + rawdata = img + + for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx \ + in read_images(rawdata, colorspace, first_frame_only): + pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ + layout_fun(imgwidthpx, imgheightpx, ndpi) + if pagewidth < 3.00 or pageheight < 3.00: + logging.warning("pdf width or height is below 3.00 - too " + "small for some viewers!") + elif pagewidth > 14400.0 or pageheight > 14400.0: + raise PdfTooLargeError( + "pdf width or height must not exceed 200 inches.") + # the image is always centered on the page + imgxpdf = (pagewidth - imgwidthpdf)/2.0 + imgypdf = (pageheight - imgheightpdf)/2.0 + pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, + imgdata, imgwidthpdf, imgheightpdf, imgxpdf, + imgypdf, pagewidth, pageheight) + + if outputstream: + pdf.tostream(outputstream) + return + + return pdf.tostring() + + +def parse_num(num, name): + if num == '': + return None + unit = None + if num.endswith("pt"): + unit = Unit.pt + elif num.endswith("cm"): + unit = Unit.cm + elif num.endswith("mm"): + unit = Unit.mm + elif num.endswith("in"): + unit = Unit.inch + else: + try: + num = float(num) + except ValueError: + msg = "%s is not a floating point number and doesn't have a " \ + "valid unit: %s" % (name, num) + raise argparse.ArgumentTypeError(msg) + if unit is None: + unit = Unit.pt + else: + num = num[:-2] + try: + num = float(num) + except ValueError: + msg = "%s is not a floating point number: %s" % (name, num) + raise argparse.ArgumentTypeError(msg) + if unit == Unit.cm: + num = cm_to_pt(num) + elif unit == Unit.mm: + num = mm_to_pt(num) + elif unit == Unit.inch: + num = in_to_pt(num) + return num + + +def parse_imgsize_num(num, name): + if num == '': + return None + unit = None + if num.endswith("pt"): + unit = ImgUnit.pt + elif num.endswith("cm"): + unit = ImgUnit.cm + elif num.endswith("mm"): + unit = ImgUnit.mm + elif num.endswith("in"): + unit = ImgUnit.inch + elif num.endswith("dpi"): + unit = ImgUnit.dpi + elif num.endswith("%"): + unit = ImgUnit.perc + else: + try: + num = float(num) + except ValueError: + msg = "%s is not a floating point number and doesn't have a " \ + "valid unit: %s" % (name, num) + raise argparse.ArgumentTypeError(msg) + if unit is None: + unit = ImgUnit.pt + else: + # strip off unit from string + if unit == ImgUnit.dpi: + num = num[:-3] + elif unit == ImgUnit.perc: + num = num[:-1] + else: + num = num[:-2] + try: + num = float(num) + except ValueError: + msg = "%s is not a floating point number: %s" % (name, num) + raise argparse.ArgumentTypeError(msg) + if unit == ImgUnit.cm: + num = (ImgSize.abs, cm_to_pt(num)) + elif unit == ImgUnit.mm: + num = (ImgSize.abs, mm_to_pt(num)) + elif unit == ImgUnit.inch: + num = (ImgSize.abs, in_to_pt(num)) + elif unit == ImgUnit.pt: + num = (ImgSize.abs, num) + elif unit == ImgUnit.dpi: + num = (ImgSize.dpi, num) + elif unit == ImgUnit.perc: + num = (ImgSize.perc, num) + return num + + +def parse_pagesize_rectarg(string): + transposed = string.endswith("^T") + if transposed: + string = string[:-2] + if papersizes.get(string.lower()): + string = papersizes[string.lower()] + if 'x' not in string: + # if there is no separating "x" in the string, then the string is + # interpreted as the width + w = parse_num(string, "width") + h = None + else: + w, h = string.split('x', 1) + w = parse_num(w, "width") + h = parse_num(h, "height") + if transposed: + w, h = h, w + if w is None and h is None: + raise argparse.ArgumentTypeError("at least one dimension must be " + "specified") + return w, h + + +def parse_imgsize_rectarg(string): + transposed = string.endswith("^T") + if transposed: + string = string[:-2] + if papersizes.get(string.lower()): + string = papersizes[string.lower()] + if 'x' not in string: + # if there is no separating "x" in the string, then the string is + # interpreted as the width + w = parse_imgsize_num(string, "width") + h = None + else: + w, h = string.split('x', 1) + w = parse_imgsize_num(w, "width") + h = parse_imgsize_num(h, "height") + if transposed: + w, h = h, w + if w is None and h is None: + raise argparse.ArgumentTypeError("at least one dimension must be " + "specified") + return w, h + + +def parse_colorspacearg(string): + for c in Colorspace: + if c.name == string: + return c + allowed = ", ".join([c.name for c in Colorspace]) + raise argparse.ArgumentTypeError("Unsupported colorspace: %s. Must be one " + "of: %s." % (string, allowed)) + + +def parse_borderarg(string): + if ':' in string: + h, v = string.split(':', 1) + if h == '': + raise argparse.ArgumentTypeError("missing value before colon") + if v == '': + raise argparse.ArgumentTypeError("missing value after colon") + else: + if string == '': + raise argparse.ArgumentTypeError("border option cannot be empty") + h, v = string, string + h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border") + if h is None and v is None: + raise argparse.ArgumentTypeError("missing value") + return h, v + + +def input_images(path): + if path == '-': + # we slurp in all data from stdin because we need to seek in it later + result = sys.stdin.buffer.read() + if len(result) == 0: + raise argparse.ArgumentTypeError("\"%s\" is empty" % path) + else: + try: + if os.path.getsize(path) == 0: + raise argparse.ArgumentTypeError("\"%s\" is empty" % path) + # test-read a byte from it so that we can abort early in case + # we cannot read data from the file + with open(path, "rb") as im: + im.read(1) + except IsADirectoryError: + raise argparse.ArgumentTypeError( + "\"%s\" is a directory" % path) + except PermissionError: + raise argparse.ArgumentTypeError( + "\"%s\" permission denied" % path) + except FileNotFoundError: + raise argparse.ArgumentTypeError( + "\"%s\" does not exist" % path) + result = path + return result + + +def parse_fitarg(string): + for m in FitMode: + if m.name == string.lower(): + return m + raise argparse.ArgumentTypeError("unknown fit mode: %s" % string) + + +def parse_panes(string): + for m in PageMode: + if m.name == string.lower(): + return m + allowed = ", ".join([m.name for m in PageMode]) + raise argparse.ArgumentTypeError("Unsupported page mode: %s. Must be one " + "of: %s." % (string, allowed)) + + +def parse_magnification(string): + for m in Magnification: + if m.name == string.lower(): + return m + try: + return float(string) + except ValueError: + pass + allowed = ", ".join([m.name for m in Magnification]) + raise argparse.ArgumentTypeError("Unsupported magnification: %s. Must be " + "a floating point number or one of: %s." % + (string, allowed)) + + +def parse_layout(string): + for l in PageLayout: + if l.name == string.lower(): + return l + allowed = ", ".join([l.name for l in PageLayout]) + raise argparse.ArgumentTypeError("Unsupported page layout: %s. Must be " + "one of: %s." % (string, allowed)) + + +def valid_date(string): + # first try parsing in ISO8601 format + try: + return datetime.strptime(string, "%Y-%m-%d") + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M") + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S") + except ValueError: + pass + # then try dateutil + try: + from dateutil import parser + except ImportError: + pass + else: + try: + return parser.parse(string) + except TypeError: + pass + # as a last resort, try the local date utility + try: + import subprocess + except ImportError: + pass + else: + try: + utime = subprocess.check_output(["date", "--date", string, "+%s"]) + except subprocess.CalledProcessError: + pass + else: + return datetime.utcfromtimestamp(int(utime)) + raise argparse.ArgumentTypeError("cannot parse date: %s" % string) + + +def main(): + rendered_papersizes = "" + for k, v in sorted(papersizes.items()): + rendered_papersizes += " %-8s %s\n" % (papernames[k], v) + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description='''\ +Losslessly convert raster images to PDF without re-encoding JPEG and JPEG2000 +images. This leads to a lossless conversion of JPEG and JPEG2000 images with +the only added file size coming from the PDF container itself. + +Other raster graphics formats are losslessly stored in a zip/flate encoding of +their RGB representation. This might increase file size and does not store +transparency. There is nothing that can be done about that until the PDF format +allows embedding other image formats like PNG. Thus, img2pdf is primarily +useful to convert JPEG and JPEG2000 images to PDF. + +The output is sent to standard output so that it can be redirected into a file +or to another program as part of a shell pipe. To directly write the output +into a file, use the -o or --output option. +''', + epilog='''\ +Colorspace + + Currently, the colorspace must be forced for JPEG 2000 images that are not in + the RGB colorspace. Available colorspace options are based on Python Imaging + Library (PIL) short handles. + + RGB RGB color + L Grayscale + 1 Black and white (internally converted to grayscale) + CMYK CMYK color + CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe) + +Paper sizes + + You can specify the short hand paper size names shown in the first column in + the table below as arguments to the --pagesize and --imgsize options. The + width and height they are mapping to is shown in the second column. Giving + the value in the second column has the same effect as giving the short hand + in the first column. Appending ^T (a caret/circumflex followed by the letter + T) turns the paper size from portrait into landscape. The postfix thus + symbolizes the transpose. The values are case insensitive. + +%s + +Fit options + + The img2pdf options for the --fit argument are shown in the first column in + the table below. The function of these options can be mapped to the geometry + operators of imagemagick. For users who are familiar with imagemagick, the + corresponding operator is shown in the second column. The third column shows + whether or not the aspect ratio is preserved for that option (same as in + imagemagick). Just like imagemagick, img2pdf tries hard to preserve the + aspect ratio, so if the --fit argument is not given, then the default is + "into" which corresponds to the absence of any operator in imagemagick. + The value of the --fit option is case insensitive. + + into | | Y | The default. Width and height values specify maximum + | | | values. + ---------+---+---+---------------------------------------------------------- + fill | ^ | Y | Width and height values specify the minimum values. + ---------+---+---+---------------------------------------------------------- + exact | ! | N | Width and height emphatically given. + ---------+---+---+---------------------------------------------------------- + shrink | > | Y | Shrinks an image with dimensions larger than the given + | | | ones (and otherwise behaves like "into"). + ---------+---+---+---------------------------------------------------------- + enlarge | < | Y | Enlarges an image with dimensions smaller than the given + | | | ones (and otherwise behaves like "into"). + +Examples + + Lines starting with a dollar sign denote commands you can enter into your + terminal. The dollar sign signifies your command prompt. It is not part of + the command you type. + + Convert two scans in JPEG format to a PDF document. + + $ img2pdf --output out.pdf page1.jpg page2.jpg + + Convert a directory of JPEG images into a PDF with printable A4 pages in + landscape mode. On each page, the photo takes the maximum amount of space + while preserving its aspect ratio and a print border of 2 cm on the top and + bottom and 2.5 cm on the left and right hand side. + + $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg + + On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the + original image size if the image is smaller than that. + + $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg + + Prepare a directory of photos to be printed borderless on photo paper with a + 3:2 aspect ratio and rotate each page so that its orientation is the same as + the input image. + + $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg + + Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf + cannot read it from the JPEG2000 file automatically. + + $ img2pdf --output out.pdf --colorspace L input.jp2 + +Argument parsing + + Argument long options can be abbreviated to a prefix if the abbreviation is + anambiguous. That is, the prefix must match a unique option. + + Beware of your shell interpreting argument values as special characters (like + the semicolon in the CMYK;I colorspace option). If in doubt, put the argument + values in single quotes. + + If you want an argument value to start with one or more minus characters, you + must use the long option name and join them with an equal sign like so: + + $ img2pdf --author=--test-- + + If your input file name starts with one or more minus characters, either + separate the input files from the other arguments by two minus signs: + + $ img2pdf -- --my-file-starts-with-two-minuses.jpg + + Or be more explicit about its relative path by prepending a ./: + + $ img2pdf ./--my-file-starts-with-two-minuses.jpg + + The order of non-positional arguments (all arguments other than the input + images) does not matter. +''' % rendered_papersizes) + + parser.add_argument( + 'images', metavar='infile', type=input_images, nargs='*', + help='Specifies the input file(s) in any format that can be read by ' + 'the Python Imaging Library (PIL). If no input images are given, then ' + 'a single image is read from standard input. The special filename "-" ' + 'can be used once to read an image from standard input. To read a ' + 'file in the current directory with the filename "-", pass it to ' + 'img2pdf by explicitly stating its relative path like "./-".') + parser.add_argument( + '-v', '--verbose', action="store_true", + help='Makes the program operate in verbose mode, printing messages on ' + 'standard error.') + parser.add_argument( + '-V', '--version', action='version', version='%(prog)s '+__version__, + help="Prints version information and exits.") + + outargs = parser.add_argument_group( + title='General output arguments', + description='') + + outargs.add_argument( + '-o', '--output', metavar='out', type=argparse.FileType('wb'), + default=sys.stdout.buffer, + help='Makes the program output to a file instead of standard output.') + outargs.add_argument( + '-C', '--colorspace', metavar='colorspace', type=parse_colorspacearg, + help=''' +Forces the PIL colorspace. See the epilogue for a list of possible values. +Usually the PDF colorspace would be derived from the color space of the input +image. This option overwrites the automatically detected colorspace from the +input image and thus forces a certain colorspace in the output PDF /ColorSpace +property. This is useful for JPEG 2000 images with a different colorspace than +RGB.''') + + outargs.add_argument( + '-D', '--nodate', action="store_true", + help='Suppresses timestamps in the output and thus makes the output ' + 'deterministic between individual runs. You can also manually ' + 'set a date using the --moddate and --creationdate options.') + + outargs.add_argument( + "--without-pdfrw", action="store_true", + help="By default, img2pdf uses the pdfrw library to create the output " + "PDF if pdfrw is available. If you want to use the internal PDF " + "generator of img2pdf even if pdfrw is present, then pass this " + "option. This can be useful if you want to have unicode metadata " + "values which pdfrw does not yet support (See " + "https://github.com/pmaupin/pdfrw/issues/39) or if you want the " + "PDF code to be more human readable.") + + outargs.add_argument( + "--first-frame-only", action="store_true", + help="By default, img2pdf will convert multi-frame images like " + "multi-page TIFF or animated GIF images to one page per frame. " + "This option will only let the first frame of every multi-frame " + "input image be converted into a page in the resulting PDF." + ) + + sizeargs = parser.add_argument_group( + title='Image and page size and layout arguments', + description='''\ + +Every input image will be placed on its own page. The image size is controlled +by the dpi value of the input image or, if unset or missing, the default dpi of +%.2f. By default, each page will have the same size as the image it shows. +Thus, there will be no visible border between the image and the page border by +default. If image size and page size are made different from each other by the +options in this section, the image will always be centered in both dimensions. + +The image size and page size can be explicitly set using the --imgsize and +--pagesize options, respectively. If either dimension of the image size is +specified but the same dimension of the page size is not, then the latter will +be derived from the former using an optional minimal distance between the image +and the page border (given by the --border option) and/or a certain fitting +strategy (given by the --fit option). The converse happens if a dimension of +the page size is set but the same dimension of the image size is not. + +Any length value in below options is represented by the meta variable L which +is a floating point value with an optional unit appended (without a space +between them). The default unit is pt (1/72 inch, the PDF unit) and other +allowed units are cm (centimeter), mm (millimeter), and in (inch). + +Any size argument of the format LxL in the options below specifies the width +and height of a rectangle where the first L represents the width and the second +L represents the height with an optional unit following each value as described +above. Either width or height may be omitted. If the height is omitted, the +separating x can be omitted as well. Omitting the width requires to prefix the +height with the separating x. The missing dimension will be chosen so to not +change the image aspect ratio. Instead of giving the width and height +explicitly, you may also specify some (case-insensitive) common page sizes such +as letter and A4. See the epilogue at the bottom for a complete list of the +valid sizes. + +The --fit option scales to fit the image into a rectangle that is either +derived from the --imgsize option or otherwise from the --pagesize option. +If the --border option is given in addition to the --imgsize option while the +--pagesize option is not given, then the page size will be calculated from the +image size, respecting the border setting. If the --border option is given in +addition to the --pagesize option while the --imgsize option is not given, then +the image size will be calculated from the page size, respecting the border +setting. If the --border option is given while both the --pagesize and +--imgsize options are passed, then the --border option will be ignored. + +''' % default_dpi) + + sizeargs.add_argument( + '-S', '--pagesize', metavar='LxL', type=parse_pagesize_rectarg, + help=''' +Sets the size of the PDF pages. The short-option is the upper case S because +it is an mnemonic for being bigger than the image size.''') + + sizeargs.add_argument( + '-s', '--imgsize', metavar='LxL', type=parse_imgsize_rectarg, + help=''' +Sets the size of the images on the PDF pages. In addition, the unit dpi is +allowed which will set the image size as a value of dots per inch. Instead of +a unit, width and height values may also have a percentage sign appended, +indicating a resize of the image by that percentage. The short-option is the +lower case s because it is an mnemonic for being smaller than the page size. +''') + sizeargs.add_argument( + '-b', '--border', metavar='L[:L]', type=parse_borderarg, + help=''' +Specifies the minimal distance between the image border and the PDF page +border. This value Is overwritten by explicit values set by --pagesize or +--imgsize. The value will be used when calculating page dimensions from the +image dimensions or the other way round. One, or two length values can be given +as an argument, separated by a colon. One value specifies the minimal border on +all four sides. Two values specify the minimal border on the top/bottom and +left/right, respectively. It is not possible to specify asymmetric borders +because images will always be centered on the page. +''') + sizeargs.add_argument( + '-f', '--fit', metavar='FIT', type=parse_fitarg, + default=FitMode.into, help=''' + +If --imgsize is given, fits the image using these dimensions. Otherwise, fit +the image into the dimensions given by --pagesize. FIT is one of into, fill, +exact, shrink and enlarge. The default value is "into". See the epilogue at the +bottom for a description of the FIT options. + +''') + sizeargs.add_argument( + '-a', '--auto-orient', action="store_true", + help=''' +If both dimensions of the page are given via --pagesize, conditionally swaps +these dimensions such that the page orientation is the same as the orientation +of the input image. If the orientation of a page gets flipped, then so do the +values set via the --border option. +''') + + metaargs = parser.add_argument_group(title='Arguments setting metadata', + description='') + metaargs.add_argument( + '--title', metavar='title', type=str, + help='Sets the title metadata value') + metaargs.add_argument( + '--author', metavar='author', type=str, + help='Sets the author metadata value') + metaargs.add_argument( + '--creator', metavar='creator', type=str, + help='Sets the creator metadata value') + metaargs.add_argument( + '--producer', metavar='producer', type=str, + default="img2pdf " + __version__, + help='Sets the producer metadata value (default is: img2pdf)') + metaargs.add_argument( + '--creationdate', metavar='creationdate', type=valid_date, + help='Sets the UTC creation date metadata value in YYYY-MM-DD or ' + 'YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format ' + 'understood by python dateutil module or any format understood ' + 'by `date --date`') + metaargs.add_argument( + '--moddate', metavar='moddate', type=valid_date, + help='Sets the UTC modification date metadata value in YYYY-MM-DD ' + 'or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format ' + 'understood by python dateutil module or any format understood ' + 'by `date --date`') + metaargs.add_argument( + '--subject', metavar='subject', type=str, + help='Sets the subject metadata value') + metaargs.add_argument( + '--keywords', metavar='kw', type=str, nargs='+', + help='Sets the keywords metadata value (can be given multiple times)') + + viewerargs = parser.add_argument_group( + title='PDF viewer arguments', + description='PDF files can specify how they are meant to be ' + 'presented to the user by a PDF viewer') + + viewerargs.add_argument( + '--viewer-panes', metavar="PANES", type=parse_panes, + help='Instruct the PDF viewer which side panes to show. Valid values ' + 'are "outlines" and "thumbs". It is not possible to specify both ' + 'at the same time.') + viewerargs.add_argument( + '--viewer-initial-page', metavar="NUM", type=int, + help='Instead of showing the first page, instruct the PDF viewer to ' + 'show the given page instead. Page numbers start with 1.') + viewerargs.add_argument( + '--viewer-magnification', metavar="MAG", type=parse_magnification, + help='Instruct the PDF viewer to open the PDF with a certain zoom ' + 'level. Valid values are either a floating point number giving ' + 'the exact zoom level, "fit" (zoom to fit whole page), "fith" ' + '(zoom to fit page width) and "fitbh" (zoom to fit visible page ' + 'width).') + viewerargs.add_argument( + '--viewer-page-layout', metavar="LAYOUT", type=parse_layout, + help='Instruct the PDF viewer how to arrange the pages on the screen. ' + 'Valid values are "single" (display single pages), "onecolumn" ' + '(one continuous column), "twocolumnright" (two continuous ' + 'columns with odd number pages on the right) and "twocolumnleft" ' + '(two continuous columns with odd numbered pages on the left)') + viewerargs.add_argument( + '--viewer-fit-window', action="store_true", + help='Instruct the PDF viewer to resize the window to fit the page ' + 'size') + viewerargs.add_argument( + '--viewer-center-window', action="store_true", + help='Instruct the PDF viewer to center the PDF viewer window') + viewerargs.add_argument( + '--viewer-fullscreen', action="store_true", + help='Instruct the PDF viewer to open the PDF in fullscreen mode') + + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + layout_fun = get_layout_fun(args.pagesize, args.imgsize, args.border, + args.fit, args.auto_orient) + + # if no positional arguments were supplied, read a single image from + # standard input + if len(args.images) == 0: + logging.info("reading image from standard input") + try: + args.images = [sys.stdin.buffer.read()] + except KeyboardInterrupt: + exit(0) + + # with the number of pages being equal to the number of images, the + # value passed to --viewer-initial-page must be between 1 and that number + if args.viewer_initial_page is not None: + if args.viewer_initial_page < 1: + parser.print_usage(file=sys.stderr) + logging.error("%s: error: argument --viewer-initial-page: must be " + "greater than zero" % parser.prog) + exit(2) + if args.viewer_initial_page > len(args.images): + parser.print_usage(file=sys.stderr) + logging.error("%s: error: argument --viewer-initial-page: must be " + "less than or equal to the total number of pages" % + parser.prog) + exit(2) + + try: + convert( + *args.images, title=args.title, author=args.author, + creator=args.creator, producer=args.producer, + creationdate=args.creationdate, moddate=args.moddate, + subject=args.subject, keywords=args.keywords, + colorspace=args.colorspace, nodate=args.nodate, + layout_fun=layout_fun, viewer_panes=args.viewer_panes, + viewer_initial_page=args.viewer_initial_page, + viewer_magnification=args.viewer_magnification, + viewer_page_layout=args.viewer_page_layout, + viewer_fit_window=args.viewer_fit_window, + viewer_center_window=args.viewer_center_window, + viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not + args.without_pdfrw, outputstream=args.output, + first_frame_only=args.first_frame_only) + except Exception as e: + logging.error("error: " + str(e)) + if logging.getLogger().isEnabledFor(logging.DEBUG): + import traceback + traceback.print_exc(file=sys.stderr) + exit(1) + +if __name__ == '__main__': + main() diff --git a/src/jp2.py b/src/jp2.py new file mode 100644 index 0000000..7f61312 --- /dev/null +++ b/src/jp2.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python +# +# Copyright (C) 2013 Johannes 'josch' Schauer +# +# this module is heavily based upon jpylyzer which is +# KB / National Library of the Netherlands, Open Planets Foundation +# and released under the same license conditions +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +import struct + + +def getBox(data, byteStart, noBytes): + boxLengthValue = struct.unpack(">I", data[byteStart:byteStart+4])[0] + boxType = data[byteStart+4:byteStart+8] + contentsStartOffset = 8 + if boxLengthValue == 1: + boxLengthValue = struct.unpack(">Q", data[byteStart+8:byteStart+16])[0] + contentsStartOffset = 16 + if boxLengthValue == 0: + boxLengthValue = noBytes-byteStart + byteEnd = byteStart + boxLengthValue + boxContents = data[byteStart+contentsStartOffset:byteEnd] + return (boxLengthValue, boxType, byteEnd, boxContents) + + +def parse_ihdr(data): + height = struct.unpack(">I", data[0:4])[0] + width = struct.unpack(">I", data[4:8])[0] + return width, height + + +def parse_colr(data): + meth = struct.unpack(">B", data[0:1])[0] + if meth != 1: + raise Exception("only enumerated color method supported") + enumCS = struct.unpack(">I", data[3:])[0] + if enumCS == 16: + return "RGB" + elif enumCS == 17: + return "L" + else: + raise Exception("only sRGB and greyscale color space is supported, " + "got %d" % enumCS) + + +def parse_resc(data): + hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data) + hdpi = ((hnum/hden) * (10**hexp) * 100)/2.54 + vdpi = ((vnum/vden) * (10**vexp) * 100)/2.54 + return hdpi, vdpi + + +def parse_res(data): + hdpi, vdpi = None, None + noBytes = len(data) + byteStart = 0 + boxLengthValue = 1 # dummy value for while loop condition + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = \ + getBox(data, byteStart, noBytes) + if boxType == b'resc': + hdpi, vdpi = parse_resc(boxContents) + break + return hdpi, vdpi + + +def parse_jp2h(data): + width, height, colorspace, hdpi, vdpi = None, None, None, None, None + noBytes = len(data) + byteStart = 0 + boxLengthValue = 1 # dummy value for while loop condition + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = \ + getBox(data, byteStart, noBytes) + if boxType == b'ihdr': + width, height = parse_ihdr(boxContents) + elif boxType == b'colr': + colorspace = parse_colr(boxContents) + elif boxType == b'res ': + hdpi, vdpi = parse_res(boxContents) + byteStart = byteEnd + return (width, height, colorspace, hdpi, vdpi) + + +def parsejp2(data): + noBytes = len(data) + byteStart = 0 + boxLengthValue = 1 # dummy value for while loop condition + width, height, colorspace, hdpi, vdpi = None, None, None, None, None + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = \ + getBox(data, byteStart, noBytes) + if boxType == b'jp2h': + width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents) + break + byteStart = byteEnd + if not width: + raise Exception("no width in jp2 header") + if not height: + raise Exception("no height in jp2 header") + if not colorspace: + raise Exception("no colorspace in jp2 header") + # retrieving the dpi is optional so we do not error out if not present + return (width, height, colorspace, hdpi, vdpi) + +if __name__ == "__main__": + import sys + width, height, colorspace = parsejp2(open(sys.argv[1]).read()) + sys.stdout.write("width = %d" % width) + sys.stdout.write("height = %d" % height) + sys.stdout.write("colorspace = %s" % colorspace) diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 0000000..b668054 --- /dev/null +++ b/src/tests/__init__.py @@ -0,0 +1,557 @@ +import unittest + +import os +import img2pdf +import zlib +from PIL import Image + +HERE = os.path.dirname(__file__) + +# convert +set date:create +set date:modify -define png:exclude-chunk=time + +# we define some variables so that the table below can be narrower +psl = (972, 504) # --pagesize landscape +psp = (504, 972) # --pagesize portrait +isl = (756, 324) # --imgsize landscape +isp = (324, 756) # --imgsize portrait +border = (162, 270) # --border +# there is no need to have test cases with the same images with inverted +# orientation (landscape/portrait) because --pagesize and --imgsize are +# already inverted +im1 = (864, 288) # imgpx #1 => 648x216 +im2 = (1152, 576) # imgpx #2 => 864x432 +# shortcuts for fit modes +f_into = img2pdf.FitMode.into +f_fill = img2pdf.FitMode.fill +f_exact = img2pdf.FitMode.exact +f_shrink = img2pdf.FitMode.shrink +f_enlarge = img2pdf.FitMode.enlarge +layout_test_cases = [ + # psp=972x504, psl=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit + (None, None, None, f_into, 0, (648, 216), (648, 216), # 000 + (864, 432), (864, 432)), + (None, None, None, f_into, 1, (648, 216), (648, 216), # 001 + (864, 432), (864, 432)), + (None, None, None, f_fill, 0, (648, 216), (648, 216), # 002 + (864, 432), (864, 432)), + (None, None, None, f_fill, 1, (648, 216), (648, 216), # 003 + (864, 432), (864, 432)), + (None, None, None, f_exact, 0, (648, 216), (648, 216), # 004 + (864, 432), (864, 432)), + (None, None, None, f_exact, 1, (648, 216), (648, 216), # 005 + (864, 432), (864, 432)), + (None, None, None, f_shrink, 0, (648, 216), (648, 216), # 006 + (864, 432), (864, 432)), + (None, None, None, f_shrink, 1, (648, 216), (648, 216), # 007 + (864, 432), (864, 432)), + (None, None, None, f_enlarge, 0, (648, 216), (648, 216), # 008 + (864, 432), (864, 432)), + (None, None, None, f_enlarge, 1, (648, 216), (648, 216), # 009 + (864, 432), (864, 432)), + (None, None, border, f_into, 0, (1188, 540), (648, 216), # 010 + (1404, 756), (864, 432)), + (None, None, border, f_into, 1, (1188, 540), (648, 216), # 011 + (1404, 756), (864, 432)), + (None, None, border, f_fill, 0, (1188, 540), (648, 216), # 012 + (1404, 756), (864, 432)), + (None, None, border, f_fill, 1, (1188, 540), (648, 216), # 013 + (1404, 756), (864, 432)), + (None, None, border, f_exact, 0, (1188, 540), (648, 216), # 014 + (1404, 756), (864, 432)), + (None, None, border, f_exact, 1, (1188, 540), (648, 216), # 015 + (1404, 756), (864, 432)), + (None, None, border, f_shrink, 0, (1188, 540), (648, 216), # 016 + (1404, 756), (864, 432)), + (None, None, border, f_shrink, 1, (1188, 540), (648, 216), # 017 + (1404, 756), (864, 432)), + (None, None, border, f_enlarge, 0, (1188, 540), (648, 216), # 018 + (1404, 756), (864, 432)), + (None, None, border, f_enlarge, 1, (1188, 540), (648, 216), # 019 + (1404, 756), (864, 432)), + (None, isp, None, f_into, 0, (324, 108), (324, 108), # 020 + (324, 162), (324, 162)), + (None, isp, None, f_into, 1, (324, 108), (324, 108), # 021 + (324, 162), (324, 162)), + (None, isp, None, f_fill, 0, (2268, 756), (2268, 756), # 022 + (1512, 756), (1512, 756)), + (None, isp, None, f_fill, 1, (2268, 756), (2268, 756), # 023 + (1512, 756), (1512, 756)), + (None, isp, None, f_exact, 0, (324, 756), (324, 756), # 024 + (324, 756), (324, 756)), + (None, isp, None, f_exact, 1, (324, 756), (324, 756), # 025 + (324, 756), (324, 756)), + (None, isp, None, f_shrink, 0, (324, 108), (324, 108), # 026 + (324, 162), (324, 162)), + (None, isp, None, f_shrink, 1, (324, 108), (324, 108), # 027 + (324, 162), (324, 162)), + (None, isp, None, f_enlarge, 0, (648, 216), (648, 216), # 028 + (864, 432), (864, 432)), + (None, isp, None, f_enlarge, 1, (648, 216), (648, 216), # 029 + (864, 432), (864, 432)), + (None, isp, border, f_into, 0, (864, 432), (324, 108), # 030 + (864, 486), (324, 162)), + (None, isp, border, f_into, 1, (864, 432), (324, 108), # 031 + (864, 486), (324, 162)), + (None, isp, border, f_fill, 0, (2808, 1080), (2268, 756), # 032 + (2052, 1080), (1512, 756)), + (None, isp, border, f_fill, 1, (2808, 1080), (2268, 756), # 033 + (2052, 1080), (1512, 756)), + (None, isp, border, f_exact, 0, (864, 1080), (324, 756), # 034 + (864, 1080), (324, 756)), + (None, isp, border, f_exact, 1, (864, 1080), (324, 756), # 035 + (864, 1080), (324, 756)), + (None, isp, border, f_shrink, 0, (864, 432), (324, 108), # 036 + (864, 486), (324, 162)), + (None, isp, border, f_shrink, 1, (864, 432), (324, 108), # 037 + (864, 486), (324, 162)), + (None, isp, border, f_enlarge, 0, (1188, 540), (648, 216), # 038 + (1404, 756), (864, 432)), + (None, isp, border, f_enlarge, 1, (1188, 540), (648, 216), # 039 + (1404, 756), (864, 432)), + (None, isl, None, f_into, 0, (756, 252), (756, 252), # 040 + (648, 324), (648, 324)), + (None, isl, None, f_into, 1, (756, 252), (756, 252), # 041 + (648, 324), (648, 324)), + (None, isl, None, f_fill, 0, (972, 324), (972, 324), # 042 + (756, 378), (756, 378)), + (None, isl, None, f_fill, 1, (972, 324), (972, 324), # 043 + (756, 378), (756, 378)), + (None, isl, None, f_exact, 0, (756, 324), (756, 324), # 044 + (756, 324), (756, 324)), + (None, isl, None, f_exact, 1, (756, 324), (756, 324), # 045 + (756, 324), (756, 324)), + (None, isl, None, f_shrink, 0, (648, 216), (648, 216), # 046 + (648, 324), (648, 324)), + (None, isl, None, f_shrink, 1, (648, 216), (648, 216), # 047 + (648, 324), (648, 324)), + (None, isl, None, f_enlarge, 0, (756, 252), (756, 252), # 048 + (864, 432), (864, 432)), + (None, isl, None, f_enlarge, 1, (756, 252), (756, 252), # 049 + (864, 432), (864, 432)), + # psp=972x504, psp=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit imgpx + (None, isl, border, f_into, 0, (1296, 576), (756, 252), # 050 + (1188, 648), (648, 324)), + (None, isl, border, f_into, 1, (1296, 576), (756, 252), # 051 + (1188, 648), (648, 324)), + (None, isl, border, f_fill, 0, (1512, 648), (972, 324), # 052 + (1296, 702), (756, 378)), + (None, isl, border, f_fill, 1, (1512, 648), (972, 324), # 053 + (1296, 702), (756, 378)), + (None, isl, border, f_exact, 0, (1296, 648), (756, 324), # 054 + (1296, 648), (756, 324)), + (None, isl, border, f_exact, 1, (1296, 648), (756, 324), # 055 + (1296, 648), (756, 324)), + (None, isl, border, f_shrink, 0, (1188, 540), (648, 216), # 056 + (1188, 648), (648, 324)), + (None, isl, border, f_shrink, 1, (1188, 540), (648, 216), # 057 + (1188, 648), (648, 324)), + (None, isl, border, f_enlarge, 0, (1296, 576), (756, 252), # 058 + (1404, 756), (864, 432)), + (None, isl, border, f_enlarge, 1, (1296, 576), (756, 252), # 059 + (1404, 756), (864, 432)), + (psp, None, None, f_into, 0, (504, 972), (504, 168), # 060 + (504, 972), (504, 252)), + (psp, None, None, f_into, 1, (972, 504), (972, 324), # 061 + (972, 504), (972, 486)), + (psp, None, None, f_fill, 0, (504, 972), (2916, 972), # 062 + (504, 972), (1944, 972)), + (psp, None, None, f_fill, 1, (972, 504), (1512, 504), # 063 + (972, 504), (1008, 504)), + (psp, None, None, f_exact, 0, (504, 972), (504, 972), # 064 + (504, 972), (504, 972)), + (psp, None, None, f_exact, 1, (972, 504), (972, 504), # 065 + (972, 504), (972, 504)), + (psp, None, None, f_shrink, 0, (504, 972), (504, 168), # 066 + (504, 972), (504, 252)), + (psp, None, None, f_shrink, 1, (972, 504), (648, 216), # 067 + (972, 504), (864, 432)), + (psp, None, None, f_enlarge, 0, (504, 972), (648, 216), # 068 + (504, 972), (864, 432)), + (psp, None, None, f_enlarge, 1, (972, 504), (972, 324), # 069 + (972, 504), (972, 486)), + (psp, None, border, f_into, 0, None, None, None, None), # 070 + (psp, None, border, f_into, 1, None, None, None, None), # 071 + (psp, None, border, f_fill, 0, (504, 972), (1944, 648), # 072 + (504, 972), (1296, 648)), + (psp, None, border, f_fill, 1, (972, 504), (648, 216), # 073 + (972, 504), (648, 324)), + (psp, None, border, f_exact, 0, None, None, None, None), # 074 + (psp, None, border, f_exact, 1, None, None, None, None), # 075 + (psp, None, border, f_shrink, 0, None, None, None, None), # 076 + (psp, None, border, f_shrink, 1, None, None, None, None), # 077 + (psp, None, border, f_enlarge, 0, (504, 972), (648, 216), # 078 + (504, 972), (864, 432)), + (psp, None, border, f_enlarge, 1, (972, 504), (648, 216), # 079 + (972, 504), (864, 432)), + (psp, isp, None, f_into, 0, (504, 972), (324, 108), # 080 + (504, 972), (324, 162)), + (psp, isp, None, f_into, 1, (972, 504), (324, 108), # 081 + (972, 504), (324, 162)), + (psp, isp, None, f_fill, 0, (504, 972), (2268, 756), # 082 + (504, 972), (1512, 756)), + (psp, isp, None, f_fill, 1, (972, 504), (2268, 756), # 083 + (972, 504), (1512, 756)), + (psp, isp, None, f_exact, 0, (504, 972), (324, 756), # 084 + (504, 972), (324, 756)), + (psp, isp, None, f_exact, 1, (972, 504), (324, 756), # 085 + (972, 504), (324, 756)), + (psp, isp, None, f_shrink, 0, (504, 972), (324, 108), # 086 + (504, 972), (324, 162)), + (psp, isp, None, f_shrink, 1, (972, 504), (324, 108), # 087 + (972, 504), (324, 162)), + (psp, isp, None, f_enlarge, 0, (504, 972), (648, 216), # 088 + (504, 972), (864, 432)), + (psp, isp, None, f_enlarge, 1, (972, 504), (648, 216), # 089 + (972, 504), (864, 432)), + (psp, isp, border, f_into, 0, (504, 972), (324, 108), # 090 + (504, 972), (324, 162)), + (psp, isp, border, f_into, 1, (972, 504), (324, 108), # 091 + (972, 504), (324, 162)), + (psp, isp, border, f_fill, 0, (504, 972), (2268, 756), # 092 + (504, 972), (1512, 756)), + (psp, isp, border, f_fill, 1, (972, 504), (2268, 756), # 093 + (972, 504), (1512, 756)), + (psp, isp, border, f_exact, 0, (504, 972), (324, 756), # 094 + (504, 972), (324, 756)), + (psp, isp, border, f_exact, 1, (972, 504), (324, 756), # 095 + (972, 504), (324, 756)), + (psp, isp, border, f_shrink, 0, (504, 972), (324, 108), # 096 + (504, 972), (324, 162)), + (psp, isp, border, f_shrink, 1, (972, 504), (324, 108), # 097 + (972, 504), (324, 162)), + (psp, isp, border, f_enlarge, 0, (504, 972), (648, 216), # 098 + (504, 972), (864, 432)), + (psp, isp, border, f_enlarge, 1, (972, 504), (648, 216), # 099 + (972, 504), (864, 432)), + # psp=972x504, psp=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit imgpx + (psp, isl, None, f_into, 0, (504, 972), (756, 252), # 100 + (504, 972), (648, 324)), + (psp, isl, None, f_into, 1, (972, 504), (756, 252), # 101 + (972, 504), (648, 324)), + (psp, isl, None, f_fill, 0, (504, 972), (972, 324), # 102 + (504, 972), (756, 378)), + (psp, isl, None, f_fill, 1, (972, 504), (972, 324), # 103 + (972, 504), (756, 378)), + (psp, isl, None, f_exact, 0, (504, 972), (756, 324), # 104 + (504, 972), (756, 324)), + (psp, isl, None, f_exact, 1, (972, 504), (756, 324), # 105 + (972, 504), (756, 324)), + (psp, isl, None, f_shrink, 0, (504, 972), (648, 216), # 106 + (504, 972), (648, 324)), + (psp, isl, None, f_shrink, 1, (972, 504), (648, 216), # 107 + (972, 504), (648, 324)), + (psp, isl, None, f_enlarge, 0, (504, 972), (756, 252), # 108 + (504, 972), (864, 432)), + (psp, isl, None, f_enlarge, 1, (972, 504), (756, 252), # 109 + (972, 504), (864, 432)), + (psp, isl, border, f_into, 0, (504, 972), (756, 252), # 110 + (504, 972), (648, 324)), + (psp, isl, border, f_into, 1, (972, 504), (756, 252), # 111 + (972, 504), (648, 324)), + (psp, isl, border, f_fill, 0, (504, 972), (972, 324), # 112 + (504, 972), (756, 378)), + (psp, isl, border, f_fill, 1, (972, 504), (972, 324), # 113 + (972, 504), (756, 378)), + (psp, isl, border, f_exact, 0, (504, 972), (756, 324), # 114 + (504, 972), (756, 324)), + (psp, isl, border, f_exact, 1, (972, 504), (756, 324), # 115 + (972, 504), (756, 324)), + (psp, isl, border, f_shrink, 0, (504, 972), (648, 216), # 116 + (504, 972), (648, 324)), + (psp, isl, border, f_shrink, 1, (972, 504), (648, 216), # 117 + (972, 504), (648, 324)), + (psp, isl, border, f_enlarge, 0, (504, 972), (756, 252), # 118 + (504, 972), (864, 432)), + (psp, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 119 + (972, 504), (864, 432)), + (psl, None, None, f_into, 0, (972, 504), (972, 324), # 120 + (972, 504), (972, 486)), + (psl, None, None, f_into, 1, (972, 504), (972, 324), # 121 + (972, 504), (972, 486)), + (psl, None, None, f_fill, 0, (972, 504), (1512, 504), # 122 + (972, 504), (1008, 504)), + (psl, None, None, f_fill, 1, (972, 504), (1512, 504), # 123 + (972, 504), (1008, 504)), + (psl, None, None, f_exact, 0, (972, 504), (972, 504), # 124 + (972, 504), (972, 504)), + (psl, None, None, f_exact, 1, (972, 504), (972, 504), # 125 + (972, 504), (972, 504)), + (psl, None, None, f_shrink, 0, (972, 504), (648, 216), # 126 + (972, 504), (864, 432)), + (psl, None, None, f_shrink, 1, (972, 504), (648, 216), # 127 + (972, 504), (864, 432)), + (psl, None, None, f_enlarge, 0, (972, 504), (972, 324), # 128 + (972, 504), (972, 486)), + (psl, None, None, f_enlarge, 1, (972, 504), (972, 324), # 129 + (972, 504), (972, 486)), + (psl, None, border, f_into, 0, (972, 504), (432, 144), # 130 + (972, 504), (360, 180)), + (psl, None, border, f_into, 1, (972, 504), (432, 144), # 131 + (972, 504), (360, 180)), + (psl, None, border, f_fill, 0, (972, 504), (540, 180), # 132 + (972, 504), (432, 216)), + (psl, None, border, f_fill, 1, (972, 504), (540, 180), # 133 + (972, 504), (432, 216)), + (psl, None, border, f_exact, 0, (972, 504), (432, 180), # 134 + (972, 504), (432, 180)), + (psl, None, border, f_exact, 1, (972, 504), (432, 180), # 135 + (972, 504), (432, 180)), + (psl, None, border, f_shrink, 0, (972, 504), (432, 144), # 136 + (972, 504), (360, 180)), + (psl, None, border, f_shrink, 1, (972, 504), (432, 144), # 137 + (972, 504), (360, 180)), + (psl, None, border, f_enlarge, 0, (972, 504), (648, 216), # 138 + (972, 504), (864, 432)), + (psl, None, border, f_enlarge, 1, (972, 504), (648, 216), # 139 + (972, 504), (864, 432)), + (psl, isp, None, f_into, 0, (972, 504), (324, 108), # 140 + (972, 504), (324, 162)), + (psl, isp, None, f_into, 1, (972, 504), (324, 108), # 141 + (972, 504), (324, 162)), + (psl, isp, None, f_fill, 0, (972, 504), (2268, 756), # 142 + (972, 504), (1512, 756)), + (psl, isp, None, f_fill, 1, (972, 504), (2268, 756), # 143 + (972, 504), (1512, 756)), + (psl, isp, None, f_exact, 0, (972, 504), (324, 756), # 144 + (972, 504), (324, 756)), + (psl, isp, None, f_exact, 1, (972, 504), (324, 756), # 145 + (972, 504), (324, 756)), + (psl, isp, None, f_shrink, 0, (972, 504), (324, 108), # 146 + (972, 504), (324, 162)), + (psl, isp, None, f_shrink, 1, (972, 504), (324, 108), # 147 + (972, 504), (324, 162)), + (psl, isp, None, f_enlarge, 0, (972, 504), (648, 216), # 148 + (972, 504), (864, 432)), + (psl, isp, None, f_enlarge, 1, (972, 504), (648, 216), # 149 + (972, 504), (864, 432)), + # psp=972x504, psl=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit imgpx + (psl, isp, border, f_into, 0, (972, 504), (324, 108), # 150 + (972, 504), (324, 162)), + (psl, isp, border, f_into, 1, (972, 504), (324, 108), # 151 + (972, 504), (324, 162)), + (psl, isp, border, f_fill, 0, (972, 504), (2268, 756), # 152 + (972, 504), (1512, 756)), + (psl, isp, border, f_fill, 1, (972, 504), (2268, 756), # 153 + (972, 504), (1512, 756)), + (psl, isp, border, f_exact, 0, (972, 504), (324, 756), # 154 + (972, 504), (324, 756)), + (psl, isp, border, f_exact, 1, (972, 504), (324, 756), # 155 + (972, 504), (324, 756)), + (psl, isp, border, f_shrink, 0, (972, 504), (324, 108), # 156 + (972, 504), (324, 162)), + (psl, isp, border, f_shrink, 1, (972, 504), (324, 108), # 157 + (972, 504), (324, 162)), + (psl, isp, border, f_enlarge, 0, (972, 504), (648, 216), # 158 + (972, 504), (864, 432)), + (psl, isp, border, f_enlarge, 1, (972, 504), (648, 216), # 159 + (972, 504), (864, 432)), + (psl, isl, None, f_into, 0, (972, 504), (756, 252), # 160 + (972, 504), (648, 324)), + (psl, isl, None, f_into, 1, (972, 504), (756, 252), # 161 + (972, 504), (648, 324)), + (psl, isl, None, f_fill, 0, (972, 504), (972, 324), # 162 + (972, 504), (756, 378)), + (psl, isl, None, f_fill, 1, (972, 504), (972, 324), # 163 + (972, 504), (756, 378)), + (psl, isl, None, f_exact, 0, (972, 504), (756, 324), # 164 + (972, 504), (756, 324)), + (psl, isl, None, f_exact, 1, (972, 504), (756, 324), # 165 + (972, 504), (756, 324)), + (psl, isl, None, f_shrink, 0, (972, 504), (648, 216), # 166 + (972, 504), (648, 324)), + (psl, isl, None, f_shrink, 1, (972, 504), (648, 216), # 167 + (972, 504), (648, 324)), + (psl, isl, None, f_enlarge, 0, (972, 504), (756, 252), # 168 + (972, 504), (864, 432)), + (psl, isl, None, f_enlarge, 1, (972, 504), (756, 252), # 169 + (972, 504), (864, 432)), + (psl, isl, border, f_into, 0, (972, 504), (756, 252), # 170 + (972, 504), (648, 324)), + (psl, isl, border, f_into, 1, (972, 504), (756, 252), # 171 + (972, 504), (648, 324)), + (psl, isl, border, f_fill, 0, (972, 504), (972, 324), # 172 + (972, 504), (756, 378)), + (psl, isl, border, f_fill, 1, (972, 504), (972, 324), # 173 + (972, 504), (756, 378)), + (psl, isl, border, f_exact, 0, (972, 504), (756, 324), # 174 + (972, 504), (756, 324)), + (psl, isl, border, f_exact, 1, (972, 504), (756, 324), # 175 + (972, 504), (756, 324)), + (psl, isl, border, f_shrink, 0, (972, 504), (648, 216), # 176 + (972, 504), (648, 324)), + (psl, isl, border, f_shrink, 1, (972, 504), (648, 216), # 177 + (972, 504), (648, 324)), + (psl, isl, border, f_enlarge, 0, (972, 504), (756, 252), # 178 + (972, 504), (864, 432)), + (psl, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 179 + (972, 504), (864, 432)), +] + + +def test_suite(): + class TestImg2Pdf(unittest.TestCase): + pass + + for i, (psopt, isopt, border, fit, ao, pspdf1, ispdf1, + pspdf2, ispdf2) in enumerate(layout_test_cases): + if isopt is not None: + isopt = ((img2pdf.ImgSize.abs, isopt[0]), + (img2pdf.ImgSize.abs, isopt[1])) + + def layout_handler( + self, psopt, isopt, border, fit, ao, pspdf, ispdf, im): + layout_fun = img2pdf.get_layout_fun(psopt, isopt, border, fit, ao) + try: + pwpdf, phpdf, iwpdf, ihpdf = \ + layout_fun(im[0], im[1], (img2pdf.default_dpi, + img2pdf.default_dpi)) + self.assertEqual((pwpdf, phpdf), pspdf) + self.assertEqual((iwpdf, ihpdf), ispdf) + except img2pdf.NegativeDimensionError: + self.assertEqual(None, pspdf) + self.assertEqual(None, ispdf) + + def layout_handler_im1(self, psopt=psopt, isopt=isopt, border=border, + fit=fit, ao=ao, pspdf=pspdf1, ispdf=ispdf1): + layout_handler(self, psopt, isopt, border, fit, ao, pspdf, ispdf, + im1) + setattr(TestImg2Pdf, "test_layout_%03d_im1" % i, layout_handler_im1) + + def layout_handler_im2(self, psopt=psopt, isopt=isopt, border=border, + fit=fit, ao=ao, pspdf=pspdf2, ispdf=ispdf2): + layout_handler(self, psopt, isopt, border, fit, ao, pspdf, ispdf, + im2) + setattr(TestImg2Pdf, "test_layout_%03d_im2" % i, layout_handler_im2) + + files = os.listdir(os.path.join(HERE, "input")) + for with_pdfrw, test_name in [(a, b) for a in [True, False] + for b in files]: + inputf = os.path.join(HERE, "input", test_name) + if not os.path.isfile(inputf): + continue + outputf = os.path.join(HERE, "output", test_name+".pdf") + assert os.path.isfile(outputf) + + def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): + with open(f, "rb") as inf: + orig_imgdata = inf.read() + output = img2pdf.convert(orig_imgdata, nodate=True, + with_pdfrw=with_pdfrw) + from io import StringIO, BytesIO + from pdfrw import PdfReader, PdfName, PdfWriter + from pdfrw.py23_diffs import convert_load, convert_store + x = PdfReader(StringIO(convert_load(output))) + self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, + PdfName.Size]) + self.assertEqual(x.Size, '7') + self.assertEqual(x.Info, {}) + self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, + PdfName.Type]) + self.assertEqual(x.Root.Type, PdfName.Catalog) + self.assertEqual(sorted(x.Root.Pages.keys()), + [PdfName.Count, PdfName.Kids, PdfName.Type]) + self.assertEqual(x.Root.Pages.Count, '1') + self.assertEqual(x.Root.Pages.Type, PdfName.Pages) + self.assertEqual(len(x.Root.Pages.Kids), 1) + self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), + [PdfName.Contents, PdfName.MediaBox, + PdfName.Parent, PdfName.Resources, PdfName.Type]) + self.assertEqual(x.Root.Pages.Kids[0].MediaBox, + ['0', '0', '115', '48']) + self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) + self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) + self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), + [PdfName.XObject]) + self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), + [PdfName.Im0]) + self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), + [PdfName.Length]) + self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, + str(len(x.Root.Pages.Kids[0].Contents.stream))) + self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, + "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " + "Do\nQ") + + imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 + + # test if the filter is valid: + self.assertIn( + imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], + [PdfName.FlateDecode]]) + # test if the colorspace is valid + self.assertIn( + imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, + PdfName.DeviceCMYK]) + # test if the image has correct size + orig_img = Image.open(f) + self.assertEqual(imgprops.Width, str(orig_img.size[0])) + self.assertEqual(imgprops.Height, str(orig_img.size[1])) + # if the input file is a jpeg then it should've been copied + # verbatim into the PDF + if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: + self.assertEqual( + x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, + convert_load(orig_imgdata)) + elif imgprops.Filter == [PdfName.FlateDecode]: + # otherwise, the data is flate encoded and has to be equal to + # the pixel data of the input image + imgdata = zlib.decompress( + convert_store( + x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) + colorspace = imgprops.ColorSpace + if colorspace == PdfName.DeviceGray: + colorspace = 'L' + elif colorspace == PdfName.DeviceRGB: + colorspace = 'RGB' + elif colorspace == PdfName.DeviceCMYK: + colorspace = 'CMYK' + else: + raise Exception("invalid colorspace") + im = Image.frombytes(colorspace, (int(imgprops.Width), + int(imgprops.Height)), + imgdata) + if orig_img.mode == '1': + orig_img = orig_img.convert("L") + elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): + orig_img = orig_img.convert("RGB") + self.assertEqual(im.tobytes(), orig_img.tobytes()) + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have + # the close() method + try: + im.close() + except AttributeError: + pass + # now use pdfrw to parse and then write out both pdfs and check the + # result for equality + y = PdfReader(out) + outx = BytesIO() + outy = BytesIO() + xwriter = PdfWriter() + ywriter = PdfWriter() + xwriter.trailer = x + ywriter.trailer = y + xwriter.write(outx) + ywriter.write(outy) + self.assertEqual(outx.getvalue(), outy.getvalue()) + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the + # close() method + try: + orig_img.close() + except AttributeError: + pass + if with_pdfrw: + setattr(TestImg2Pdf, "test_%s_with_pdfrw" % test_name, handle) + else: + setattr(TestImg2Pdf, "test_%s_without_pdfrw" % test_name, handle) + + return unittest.TestSuite(( + unittest.makeSuite(TestImg2Pdf), + )) diff --git a/src/tests/input/CMYK.jpg b/src/tests/input/CMYK.jpg new file mode 100644 index 0000000..44213a8 Binary files /dev/null and b/src/tests/input/CMYK.jpg differ diff --git a/src/tests/input/normal.jpg b/src/tests/input/normal.jpg new file mode 100644 index 0000000..2c036e9 Binary files /dev/null and b/src/tests/input/normal.jpg differ diff --git a/src/tests/input/normal.png b/src/tests/input/normal.png new file mode 100644 index 0000000..87b9a6e Binary files /dev/null and b/src/tests/input/normal.png differ diff --git a/src/tests/output/CMYK.jpg.pdf b/src/tests/output/CMYK.jpg.pdf new file mode 100644 index 0000000..bfe67f3 Binary files /dev/null and b/src/tests/output/CMYK.jpg.pdf differ diff --git a/src/tests/output/CMYK.tif.pdf b/src/tests/output/CMYK.tif.pdf new file mode 100644 index 0000000..b00586b Binary files /dev/null and b/src/tests/output/CMYK.tif.pdf differ diff --git a/src/tests/output/normal.jpg.pdf b/src/tests/output/normal.jpg.pdf new file mode 100644 index 0000000..87d2645 Binary files /dev/null and b/src/tests/output/normal.jpg.pdf differ diff --git a/src/tests/output/normal.png.pdf b/src/tests/output/normal.png.pdf new file mode 100644 index 0000000..2628c5d Binary files /dev/null and b/src/tests/output/normal.png.pdf differ diff --git a/test_comp.sh b/test_comp.sh new file mode 100755 index 0000000..ae832e2 --- /dev/null +++ b/test_comp.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +if [ $# -ne 1 ]; then + echo "usage: $0 image" + exit +fi + +echo "converting image to pdf, trying all compressions imagemagick has to offer" +echo "if, as a result, Zip/FlateDecode should NOT be the lossless compression with the lowest size ratio, contact me j [dot] schauer [at] email [dot] de" +echo "also, send me the image in question" +echo + +imsize=`stat -c "%s" "$1"` + +for a in `convert -list compress`; do + echo "encode:\t$a" + convert "$1" -compress $a "`basename $1 .jpg`.pdf" + pdfimages "`basename $1 .jpg`.pdf" "`basename $1 .jpg`" + /bin/echo -ne "diff:\t" + diff=`compare -metric AE "$1" "\`basename $1 .jpg\`-000.ppm" null: 2>&1` + if [ "$diff" != "0" ]; then + echo "lossy" + else + echo "lossless" + fi + /bin/echo -ne "size:\t" + pdfsize=`stat -c "%s" "\`basename $1 .jpg\`.pdf"` + echo "scale=1;$pdfsize/$imsize" | bc + /bin/echo -ne "pdf:\t" + grep --max-count=1 --text /Filter "`basename $1 .jpg`.pdf" + echo +done -- cgit v1.2.3 From f71d3883871752e9ab72bb175c89a378df2af529 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 05:49:31 +0100 Subject: Import upstream version 0.2.3 --- PKG-INFO | 6 +- setup.py | 4 +- src/img2pdf.egg-info/PKG-INFO | 6 +- src/img2pdf.egg-info/SOURCES.txt | 2 + src/img2pdf.py | 173 ++++++++++++++++++++++++++++----------- src/jp2.py | 1 + src/tests/__init__.py | 44 +++++++++- src/tests/input/mono.png | Bin 0 -> 444 bytes src/tests/output/mono.png.pdf | Bin 0 -> 915 bytes 9 files changed, 180 insertions(+), 56 deletions(-) create mode 100644 src/tests/input/mono.png create mode 100644 src/tests/output/mono.png.pdf diff --git a/PKG-INFO b/PKG-INFO index b18e9d6..870fa2d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.2.1 +Version: 0.2.3 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3 Description: img2pdf ======= @@ -157,7 +157,7 @@ Classifier: Intended Audience :: Other Audience Classifier: Environment :: Console Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English diff --git a/setup.py b/setup.py index 50de1e6..874380c 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup -VERSION = "0.2.1" +VERSION = "0.2.3" setup( name='img2pdf', @@ -18,7 +18,7 @@ setup( 'Environment :: Console', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: Implementation :: CPython', 'License :: OSI Approved :: GNU Lesser General Public License v3 ' '(LGPLv3)', diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO index b18e9d6..870fa2d 100644 --- a/src/img2pdf.egg-info/PKG-INFO +++ b/src/img2pdf.egg-info/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.2.1 +Version: 0.2.3 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3 Description: img2pdf ======= @@ -157,7 +157,7 @@ Classifier: Intended Audience :: Other Audience Classifier: Environment :: Console Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt index 192589d..add31f1 100644 --- a/src/img2pdf.egg-info/SOURCES.txt +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -15,9 +15,11 @@ src/img2pdf.egg-info/top_level.txt src/img2pdf.egg-info/zip-safe src/tests/__init__.py src/tests/input/CMYK.jpg +src/tests/input/mono.png src/tests/input/normal.jpg src/tests/input/normal.png src/tests/output/CMYK.jpg.pdf src/tests/output/CMYK.tif.pdf +src/tests/output/mono.png.pdf src/tests/output/normal.jpg.pdf src/tests/output/normal.png.pdf \ No newline at end of file diff --git a/src/img2pdf.py b/src/img2pdf.py index 2042d13..20fe784 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -28,7 +28,7 @@ from enum import Enum from io import BytesIO import logging -__version__ = "0.2.1" +__version__ = "0.2.3" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", @@ -58,7 +58,7 @@ PageOrientation = Enum('PageOrientation', 'portrait landscape') Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other') -ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 other') +ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 other') PageMode = Enum('PageMode', 'none outlines thumbs') @@ -167,6 +167,8 @@ class MyPdfDict(object): class MyPdfName(): def __getattr__(self, name): return b'/' + name.encode('ascii') + + MyPdfName = MyPdfName() @@ -314,7 +316,7 @@ class pdfdoc(object): self.info[PdfName.Author] = PdfString.encode(author) if creator is not None: self.info[PdfName.Creator] = PdfString.encode(creator) - if producer is not None: + if producer is not None and producer != "": self.info[PdfName.Producer] = PdfString.encode(producer) if creationdate is not None: self.info[PdfName.CreationDate] = \ @@ -354,14 +356,15 @@ class pdfdoc(object): imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, pageheight): if self.with_pdfrw: - from pdfrw import PdfDict, PdfName + from pdfrw import PdfDict, PdfName, PdfObject from pdfrw.py23_diffs import convert_load else: PdfDict = MyPdfDict PdfName = MyPdfName + PdfObject = MyPdfObject convert_load = my_convert_load - if color == Colorspace.L: + if color == Colorspace['1'] or color == Colorspace.L: colorspace = PdfName.DeviceGray elif color == Colorspace.RGB: colorspace = PdfName.DeviceRGB @@ -372,11 +375,14 @@ class pdfdoc(object): % color.name) # either embed the whole jpeg or deflate the bitmap representation + logging.debug(imgformat) if imgformat is ImageFormat.JPEG: ofilter = [PdfName.DCTDecode] elif imgformat is ImageFormat.JPEG2000: ofilter = [PdfName.JPXDecode] self.writer.version = "1.5" # jpeg2000 needs pdf 1.5 + elif imgformat is ImageFormat.CCITTGroup4: + ofilter = [PdfName.CCITTFaxDecode] else: ofilter = [PdfName.FlateDecode] @@ -389,12 +395,23 @@ class pdfdoc(object): image[PdfName.Height] = imgheightpx image[PdfName.ColorSpace] = colorspace # hardcoded as PIL doesn't provide bits for non-jpeg formats - image[PdfName.BitsPerComponent] = 8 + if imgformat is ImageFormat.CCITTGroup4: + image[PdfName.BitsPerComponent] = 1 + else: + image[PdfName.BitsPerComponent] = 8 if color == Colorspace['CMYK;I']: # Inverts all four channels image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0] + if imgformat is ImageFormat.CCITTGroup4: + decodeparms = PdfDict() + decodeparms[PdfName.K] = -1 + decodeparms[PdfName.BlackIs1] = PdfObject('true') + decodeparms[PdfName.Columns] = imgwidthpx + decodeparms[PdfName.Rows] = imgheightpx + image[PdfName.DecodeParms] = [decodeparms] + text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii") @@ -594,6 +611,45 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): return (color, ndpi, imgwidthpx, imgheightpx) +def transcode_monochrome(imgdata): + """Convert the open PIL.Image imgdata to compressed CCITT Group4 data""" + + from PIL import TiffImagePlugin + + logging.debug("Converting monochrome to CCITT Group4") + + # Convert the image to Group 4 in memory. If libtiff is not installed and + # Pillow is not compiled against it, .save() will raise an exception. + newimgio = BytesIO() + imgdata.save(newimgio, format='TIFF', compression='group4') + + # Open new image in memory + newimgio.seek(0) + newimg = Image.open(newimgio) + + # If Pillow is passed an invalid compression argument it will ignore it; + # make sure the image actually got compressed. + if newimg.info['compression'] != 'group4': + raise ValueError("Image not compressed as expected") + + # Read the TIFF tags to find the offset(s) of the compressed data strips. + strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS] + strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] + rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP] + + # PIL always seems to create a single strip even for very large TIFFs when + # it saves images, so assume we only have to read a single strip. + # A test ~10 GPixel image was still encoded as a single strip. Just to be + # safe check throw an error if there is more than one offset. + if len(strip_offsets) > 1: + raise NotImplementedError("Transcoding multiple strips not supported") + + newimgio.seek(strip_offsets[0]) + ccittdata = newimgio.read(strip_bytes[0]) + + return ccittdata + + def read_images(rawdata, colorspace, first_frame_only=False): im = BytesIO(rawdata) im.seek(0) @@ -648,11 +704,20 @@ def read_images(rawdata, colorspace, first_frame_only=False): color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace) - # because we do not support /CCITTFaxDecode + newimg = None if color == Colorspace['1']: - logging.debug("Converting colorspace 1 to L") - newimg = imgdata.convert('L') - color = Colorspace.L + try: + ccittdata = transcode_monochrome(imgdata) + imgformat = ImageFormat.CCITTGroup4 + result.append((color, ndpi, imgformat, ccittdata, + imgwidthpx, imgheightpx)) + img_page_count += 1 + continue + except Exception as e: + logging.debug(e) + logging.debug("Converting colorspace 1 to L") + newimg = imgdata.convert('L') + color = Colorspace.L elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, Colorspace["CMYK;I"]]: logging.debug("Colorspace is OK: %s", color) @@ -927,12 +992,22 @@ def convert(*images, title=None, viewer_fit_window, viewer_center_window, viewer_fullscreen, with_pdfrw) + # backwards compatibility with older img2pdf versions where the first + # argument to the function had to be given as a list + if len(images) == 1: + # if only one argument was given and it is a list, expand it + if isinstance(images[0], (list, tuple)): + images = images[0] + for img in images: # img is allowed to be a path, a binary string representing image data # or a file-like object (really anything that implements read()) try: rawdata = img.read() except AttributeError: + if not isinstance(img, (str, bytes)): + raise TypeError( + "Neither implements read() nor is str or bytes") # the thing doesn't have a read() function, so try if we can treat # it as a file name try: @@ -1256,10 +1331,11 @@ useful to convert JPEG and JPEG2000 images to PDF. The output is sent to standard output so that it can be redirected into a file or to another program as part of a shell pipe. To directly write the output into a file, use the -o or --output option. + +Options: ''', epilog='''\ -Colorspace - +Colorspace: Currently, the colorspace must be forced for JPEG 2000 images that are not in the RGB colorspace. Available colorspace options are based on Python Imaging Library (PIL) short handles. @@ -1270,8 +1346,7 @@ Colorspace CMYK CMYK color CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe) -Paper sizes - +Paper sizes: You can specify the short hand paper size names shown in the first column in the table below as arguments to the --pagesize and --imgsize options. The width and height they are mapping to is shown in the second column. Giving @@ -1282,8 +1357,7 @@ Paper sizes %s -Fit options - +Fit options: The img2pdf options for the --fit argument are shown in the first column in the table below. The function of these options can be mapped to the geometry operators of imagemagick. For users who are familiar with imagemagick, the @@ -1307,8 +1381,32 @@ Fit options enlarge | < | Y | Enlarges an image with dimensions smaller than the given | | | ones (and otherwise behaves like "into"). -Examples +Argument parsing: + Argument long options can be abbreviated to a prefix if the abbreviation is + anambiguous. That is, the prefix must match a unique option. + + Beware of your shell interpreting argument values as special characters (like + the semicolon in the CMYK;I colorspace option). If in doubt, put the argument + values in single quotes. + + If you want an argument value to start with one or more minus characters, you + must use the long option name and join them with an equal sign like so: + + $ img2pdf --author=--test-- + + If your input file name starts with one or more minus characters, either + separate the input files from the other arguments by two minus signs: + $ img2pdf -- --my-file-starts-with-two-minuses.jpg + + Or be more explicit about its relative path by prepending a ./: + + $ img2pdf ./--my-file-starts-with-two-minuses.jpg + + The order of non-positional arguments (all arguments other than the input + images) does not matter. + +Examples: Lines starting with a dollar sign denote commands you can enter into your terminal. The dollar sign signifies your command prompt. It is not part of the command you type. @@ -1340,31 +1438,9 @@ Examples $ img2pdf --output out.pdf --colorspace L input.jp2 -Argument parsing - - Argument long options can be abbreviated to a prefix if the abbreviation is - anambiguous. That is, the prefix must match a unique option. - - Beware of your shell interpreting argument values as special characters (like - the semicolon in the CMYK;I colorspace option). If in doubt, put the argument - values in single quotes. - - If you want an argument value to start with one or more minus characters, you - must use the long option name and join them with an equal sign like so: - - $ img2pdf --author=--test-- - - If your input file name starts with one or more minus characters, either - separate the input files from the other arguments by two minus signs: - - $ img2pdf -- --my-file-starts-with-two-minuses.jpg +Written by Johannes 'josch' Schauer - Or be more explicit about its relative path by prepending a ./: - - $ img2pdf ./--my-file-starts-with-two-minuses.jpg - - The order of non-positional arguments (all arguments other than the input - images) does not matter. +Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues ''' % rendered_papersizes) parser.add_argument( @@ -1385,7 +1461,7 @@ Argument parsing outargs = parser.add_argument_group( title='General output arguments', - description='') + description='Arguments controlling the output format.') outargs.add_argument( '-o', '--output', metavar='out', type=argparse.FileType('wb'), @@ -1428,8 +1504,7 @@ RGB.''') sizeargs = parser.add_argument_group( title='Image and page size and layout arguments', description='''\ - -Every input image will be placed on its own page. The image size is controlled +Every input image will be placed on its own page. The image size is controlled by the dpi value of the input image or, if unset or missing, the default dpi of %.2f. By default, each page will have the same size as the image it shows. Thus, there will be no visible border between the image and the page border by @@ -1518,8 +1593,10 @@ of the input image. If the orientation of a page gets flipped, then so do the values set via the --border option. ''') - metaargs = parser.add_argument_group(title='Arguments setting metadata', - description='') + metaargs = parser.add_argument_group( + title='Arguments setting metadata', + description='Options handling embedded timestamps, title and author ' + 'information.') metaargs.add_argument( '--title', metavar='title', type=str, help='Sets the title metadata value') @@ -1532,7 +1609,8 @@ values set via the --border option. metaargs.add_argument( '--producer', metavar='producer', type=str, default="img2pdf " + __version__, - help='Sets the producer metadata value (default is: img2pdf)') + help='Sets the producer metadata value ' + '(default is: img2pdf ' + __version__ + ')') metaargs.add_argument( '--creationdate', metavar='creationdate', type=valid_date, help='Sets the UTC creation date metadata value in YYYY-MM-DD or ' @@ -1646,5 +1724,6 @@ values set via the --border option. traceback.print_exc(file=sys.stderr) exit(1) + if __name__ == '__main__': main() diff --git a/src/jp2.py b/src/jp2.py index 7f61312..30edb7e 100644 --- a/src/jp2.py +++ b/src/jp2.py @@ -116,6 +116,7 @@ def parsejp2(data): # retrieving the dpi is optional so we do not error out if not present return (width, height, colorspace, hdpi, vdpi) + if __name__ == "__main__": import sys width, height, colorspace = parsejp2(open(sys.argv[1]).read()) diff --git a/src/tests/__init__.py b/src/tests/__init__.py index b668054..506fc48 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -4,6 +4,8 @@ import os import img2pdf import zlib from PIL import Image +from io import BytesIO +import struct HERE = os.path.dirname(__file__) @@ -396,6 +398,29 @@ layout_test_cases = [ ] +def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4): + # Quick and dirty TIFF header builder from + # https://stackoverflow.com/questions/2641770 + tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h' + return struct.pack( + tiff_header_struct, + b'II', # Byte order indication: Little indian + 42, # Version number (always 42) + 8, # Offset to first IFD + 8, # Number of tags in IFD + 256, 4, 1, width, # ImageWidth, LONG, 1, width + 257, 4, 1, height, # ImageLength, LONG, 1, lenght + 258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1 + 259, 3, 1, ccitt_group, # Compression, SHORT, 1, 4 = CCITT Group 4 + 262, 3, 1, 1, # Threshholding, SHORT, 1, 0 = WhiteIsZero + 273, 4, 1, struct.calcsize( + tiff_header_struct), # StripOffsets, LONG, 1, len of header + 278, 4, 1, height, # RowsPerStrip, LONG, 1, lenght + 279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image + 0 # last IFD + ) + + def test_suite(): class TestImg2Pdf(unittest.TestCase): pass @@ -485,7 +510,8 @@ def test_suite(): # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], - [PdfName.FlateDecode]]) + [PdfName.FlateDecode], + [PdfName.CCITTFaxDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, @@ -500,6 +526,22 @@ def test_suite(): self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) + elif imgprops.Filter == [PdfName.CCITTFaxDecode]: + tiff_header = tiff_header_for_ccitt( + int(imgprops.Width), int(imgprops.Height), + int(imgprops.Length), 4) + imgio = BytesIO() + imgio.write(tiff_header) + imgio.write(convert_store( + x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) + imgio.seek(0) + im = Image.open(imgio) + self.assertEqual(im.tobytes(), orig_img.tobytes()) + try: + im.close() + except AttributeError: + pass + elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image diff --git a/src/tests/input/mono.png b/src/tests/input/mono.png new file mode 100644 index 0000000..59b17ad Binary files /dev/null and b/src/tests/input/mono.png differ diff --git a/src/tests/output/mono.png.pdf b/src/tests/output/mono.png.pdf new file mode 100644 index 0000000..eda3ec7 Binary files /dev/null and b/src/tests/output/mono.png.pdf differ -- cgit v1.2.3 From 89e8093b199ab2ad8e2b628b69e5046966a9df08 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 05:50:35 +0100 Subject: debian/changelog: add entry for 0.2.3-1 --- debian/changelog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/debian/changelog b/debian/changelog index 4f70610..15b15cb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +img2pdf (0.2.3-1) UNRELEASED; urgency=medium + + * New upstream release + + -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 + img2pdf (0.2.1-1) unstable; urgency=medium * New upstream release (closes: #833376) -- cgit v1.2.3 From aefd85cc7278add7216d977b3605160accfa7ce4 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 05:59:30 +0100 Subject: Add Vcs-Browser and Vcs-Git for dgit --- debian/changelog | 1 + debian/control | 2 ++ 2 files changed, 3 insertions(+) diff --git a/debian/changelog b/debian/changelog index 15b15cb..b394ce8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * New upstream release + * Add Vcs-Browser and Vcs-Git for dgit -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/control b/debian/control index a9bbcfa..268f728 100644 --- a/debian/control +++ b/debian/control @@ -4,6 +4,8 @@ Section: python Priority: optional Build-Depends: dh-python, python3-setuptools, python3-all, debhelper, python3-pil, python3-pdfrw, help2man, python3-pkg-resources Standards-Version: 3.9.6 +Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ +Vcs-Git: https://git.dgit.debian.org/img2pdf Homepage: https://gitlab.mister-muffin.de/josch/img2pdf Package: img2pdf -- cgit v1.2.3 From 33ca9a3956e99cf7c0afcb795fe6f4324a42ae26 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:02:39 +0100 Subject: Let img2pdf depend on the exact same version of python3-img2pdf --- debian/changelog | 1 + debian/control | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index b394ce8..8cf34a7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,6 +2,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * New upstream release * Add Vcs-Browser and Vcs-Git for dgit + * Let img2pdf depend on the exact same version of python3-img2pdf -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/control b/debian/control index 268f728..b56dbad 100644 --- a/debian/control +++ b/debian/control @@ -11,7 +11,7 @@ Homepage: https://gitlab.mister-muffin.de/josch/img2pdf Package: img2pdf Architecture: all Section: utils -Depends: ${misc:Depends}, ${python3:Depends}, python3-img2pdf +Depends: ${misc:Depends}, ${python3:Depends}, python3-img2pdf (= ${binary:Version}) Description: Lossless conversion of raster images to PDF This program will take a list of raster images and produce a PDF file with the images embedded in it. JPEG and JPEG2000 images will be included without -- cgit v1.2.3 From 23aaed21206c9c32971ddbe26e1080b101d0ba28 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:07:06 +0100 Subject: Add autopkgtests --- debian/changelog | 1 + debian/tests/control | 3 +++ debian/tests/default | 7 +++++++ 3 files changed, 11 insertions(+) create mode 100644 debian/tests/control create mode 100644 debian/tests/default diff --git a/debian/changelog b/debian/changelog index 8cf34a7..1e4d4ac 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * New upstream release * Add Vcs-Browser and Vcs-Git for dgit * Let img2pdf depend on the exact same version of python3-img2pdf + * Add autopkgtests -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000..d46241f --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,3 @@ +Tests: default +Restrictions: allow-stderr +Depends: @, python3-pdfrw diff --git a/debian/tests/default b/debian/tests/default new file mode 100644 index 0000000..6230e1f --- /dev/null +++ b/debian/tests/default @@ -0,0 +1,7 @@ +#!/bin/sh +set -exu + +for f in CMYK.jpg mono.png normal.jpg normal.png; do + img2pdf --nodate --producer="" src/tests/input/$f -o $ADTTMP/$f.pdf + diff -u --text src/tests/output/$f.pdf $ADTTMP/$f.pdf +done -- cgit v1.2.3 From cc52115e2c38674e3712603a8b4fb0ac3877cbc0 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:13:35 +0100 Subject: Add explicit dependency on python3-pkg-resources to img2pdf --- debian/changelog | 3 +++ debian/control | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 1e4d4ac..5e7d3c1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,9 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * Add Vcs-Browser and Vcs-Git for dgit * Let img2pdf depend on the exact same version of python3-img2pdf * Add autopkgtests + * Move python3-pkg-resources from Build-Depends to Depends of img2pdf. This + is the actual fix for #818617 which was wrongly fixed in version 0.2.0-2 + by adding python3-pkg-resources to Build-Depends. -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/control b/debian/control index b56dbad..20573d2 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: img2pdf Maintainer: Johannes Schauer Section: python Priority: optional -Build-Depends: dh-python, python3-setuptools, python3-all, debhelper, python3-pil, python3-pdfrw, help2man, python3-pkg-resources +Build-Depends: dh-python, python3-setuptools, python3-all, debhelper, python3-pil, python3-pdfrw, help2man Standards-Version: 3.9.6 Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ Vcs-Git: https://git.dgit.debian.org/img2pdf @@ -11,7 +11,7 @@ Homepage: https://gitlab.mister-muffin.de/josch/img2pdf Package: img2pdf Architecture: all Section: utils -Depends: ${misc:Depends}, ${python3:Depends}, python3-img2pdf (= ${binary:Version}) +Depends: ${misc:Depends}, ${python3:Depends}, python3-img2pdf (= ${binary:Version}), python3-pkg-resources Description: Lossless conversion of raster images to PDF This program will take a list of raster images and produce a PDF file with the images embedded in it. JPEG and JPEG2000 images will be included without -- cgit v1.2.3 From fc252787ba7b3f39581138bf094a1077e04e8953 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:14:37 +0100 Subject: Do not remove src/img2pdf.egg-info in clean target as it's part of the source package --- debian/changelog | 2 ++ debian/rules | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 5e7d3c1..8b99d3b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -7,6 +7,8 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * Move python3-pkg-resources from Build-Depends to Depends of img2pdf. This is the actual fix for #818617 which was wrongly fixed in version 0.2.0-2 by adding python3-pkg-resources to Build-Depends. + * Do not remove src/img2pdf.egg-info in clean target as it's part of the + source package -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/rules b/debian/rules index 43f4ec5..76520dd 100755 --- a/debian/rules +++ b/debian/rules @@ -4,7 +4,6 @@ dh $@ --with python3 --buildsystem=pybuild override_dh_auto_clean: - rm -rf src/img2pdf.egg-info rm -f img2pdf.1 dh_auto_clean -- cgit v1.2.3 From cf846800f8df07d43b85624c441c6d8314350c44 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:18:39 +0100 Subject: Enhance the long and short description --- debian/changelog | 1 + debian/control | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 8b99d3b..690c346 100644 --- a/debian/changelog +++ b/debian/changelog @@ -9,6 +9,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium by adding python3-pkg-resources to Build-Depends. * Do not remove src/img2pdf.egg-info in clean target as it's part of the source package + * Enhance the long and short description -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/control b/debian/control index 20573d2..a0d8b99 100644 --- a/debian/control +++ b/debian/control @@ -22,12 +22,14 @@ Description: Lossless conversion of raster images to PDF container with a quality to filesize ratio that is typically better (in case of JPEG and JPEG2000 images) or equal (in case of other formats) than that of existing tools. + . + This package contains the executable. Package: python3-img2pdf Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Suggests: python3-pdfrw -Description: Lossless conversion of raster images to PDF +Description: Lossless conversion of raster images to PDF (library) This module will take a list of raster images and produce a PDF file with the images embedded in it. JPEG and JPEG2000 images will be included without recompression. Raster images in other formats will be included with zip/flate @@ -40,3 +42,5 @@ Description: Lossless conversion of raster images to PDF . Img2pdf includes its own PDF writer but will use the pdfrw module if available instead. + . + This package contains the Python library. -- cgit v1.2.3 From 5c39dada7fc61f32949a92affe6913d4fe2943c2 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:19:28 +0100 Subject: Remove leftover debian/python3-jp2.substvars --- debian/changelog | 1 + debian/python3-jp2.substvars | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 debian/python3-jp2.substvars diff --git a/debian/changelog b/debian/changelog index 690c346..2c9f478 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * Do not remove src/img2pdf.egg-info in clean target as it's part of the source package * Enhance the long and short description + * Remove leftover debian/python3-jp2.substvars -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/python3-jp2.substvars b/debian/python3-jp2.substvars deleted file mode 100644 index 978fc8b..0000000 --- a/debian/python3-jp2.substvars +++ /dev/null @@ -1,2 +0,0 @@ -misc:Depends= -misc:Pre-Depends= -- cgit v1.2.3 From 5707228a35d6ebfbe10df84623a4d607d5afe872 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:20:52 +0100 Subject: Bump debhelper compat level to 10 --- debian/changelog | 1 + debian/compat | 2 +- debian/control | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 2c9f478..0f922e5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -11,6 +11,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium source package * Enhance the long and short description * Remove leftover debian/python3-jp2.substvars + * Bump debhelper compat level to 10 -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/compat b/debian/compat index ec63514..f599e28 100644 --- a/debian/compat +++ b/debian/compat @@ -1 +1 @@ -9 +10 diff --git a/debian/control b/debian/control index a0d8b99..cccb189 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: img2pdf Maintainer: Johannes Schauer Section: python Priority: optional -Build-Depends: dh-python, python3-setuptools, python3-all, debhelper, python3-pil, python3-pdfrw, help2man +Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 10), python3-pil, python3-pdfrw, help2man Standards-Version: 3.9.6 Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ Vcs-Git: https://git.dgit.debian.org/img2pdf -- cgit v1.2.3 From 667cc2b2010b1f97da7736bb198839ca708a442a Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:34:51 +0100 Subject: Bump Standards-Version to 3.9.8 (no changes required) --- debian/changelog | 1 + debian/control | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 0f922e5..30189ef 100644 --- a/debian/changelog +++ b/debian/changelog @@ -12,6 +12,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * Enhance the long and short description * Remove leftover debian/python3-jp2.substvars * Bump debhelper compat level to 10 + * Bump Standards-Version to 3.9.8 (no changes required) -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 diff --git a/debian/control b/debian/control index cccb189..466cb98 100644 --- a/debian/control +++ b/debian/control @@ -3,7 +3,7 @@ Maintainer: Johannes Schauer Section: python Priority: optional Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 10), python3-pil, python3-pdfrw, help2man -Standards-Version: 3.9.6 +Standards-Version: 3.9.8 Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ Vcs-Git: https://git.dgit.debian.org/img2pdf Homepage: https://gitlab.mister-muffin.de/josch/img2pdf -- cgit v1.2.3 From 20da8c12b9524ea6d405cf12df5a2f426b60b4c5 Mon Sep 17 00:00:00 2001 From: Johannes Schauer Date: Fri, 20 Jan 2017 06:58:07 +0100 Subject: Upload 0.2.3-1 to unstable --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 30189ef..003c69d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -img2pdf (0.2.3-1) UNRELEASED; urgency=medium +img2pdf (0.2.3-1) unstable; urgency=medium * New upstream release * Add Vcs-Browser and Vcs-Git for dgit @@ -14,7 +14,7 @@ img2pdf (0.2.3-1) UNRELEASED; urgency=medium * Bump debhelper compat level to 10 * Bump Standards-Version to 3.9.8 (no changes required) - -- Johannes Schauer Fri, 20 Jan 2017 05:49:33 +0100 + -- Johannes Schauer Fri, 20 Jan 2017 06:57:27 +0100 img2pdf (0.2.1-1) unstable; urgency=medium -- cgit v1.2.3 From aa564ac57de87724808ae3c2c6baf92688d181cc Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:21:40 +0200 Subject: Import upstream version 0.3.0 --- CHANGES.rst | 108 +++++++++++ MANIFEST.in | 3 + PKG-INFO | 106 ++++++---- README.md | 98 ++++++---- setup.cfg | 1 - setup.py | 28 ++- src/img2pdf.egg-info/PKG-INFO | 106 ++++++---- src/img2pdf.egg-info/SOURCES.txt | 8 + src/img2pdf.egg-info/requires.txt | 3 + src/img2pdf.py | 383 +++++++++++++++++++++++++++---------- src/tests/__init__.py | 231 +++++++++++++--------- src/tests/input/CMYK.tif | Bin 0 -> 22286 bytes src/tests/input/animation.gif | Bin 0 -> 1930 bytes src/tests/input/gray.png | Bin 0 -> 814 bytes src/tests/input/mono.tif | Bin 0 -> 262 bytes src/tests/input/normal.png | Bin 1130 -> 4992 bytes src/tests/output/CMYK.jpg.pdf | Bin 5560 -> 5558 bytes src/tests/output/CMYK.tif.pdf | Bin 1724 -> 1722 bytes src/tests/output/animation.gif.pdf | Bin 0 -> 6070 bytes src/tests/output/gray.png.pdf | Bin 0 -> 1329 bytes src/tests/output/mono.png.pdf | Bin 915 -> 958 bytes src/tests/output/mono.tif.pdf | Bin 0 -> 921 bytes src/tests/output/normal.jpg.pdf | Bin 3091 -> 3089 bytes src/tests/output/normal.png.pdf | Bin 1573 -> 1670 bytes 24 files changed, 762 insertions(+), 313 deletions(-) create mode 100644 CHANGES.rst create mode 100644 src/tests/input/CMYK.tif create mode 100644 src/tests/input/animation.gif create mode 100644 src/tests/input/gray.png create mode 100644 src/tests/input/mono.tif create mode 100644 src/tests/output/animation.gif.pdf create mode 100644 src/tests/output/gray.png.pdf create mode 100644 src/tests/output/mono.tif.pdf diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..d4476a8 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,108 @@ +======= +CHANGES +======= + +0.3.0 +----- + + - Store non-jpeg images using PNG compression + - Support arbitrarily large pages via PDF /UserUnit field + - Disallow input with alpha channel as it cannot be preserved + - Add option --pillow-limit-break to support very large input + +0.2.4 +----- + + - Restore support for Python 2.7 + - Add support for PyPy + - Add support for testing using tox + +0.2.3 +----- + + - version number bump for botched pypi upload... + +0.2.2 +----- + + - automatic monochrome CCITT Group4 encoding via Pillow/libtiff + +0.2.1 +----- + + - set img2pdf as /producer value + - support multi-frame images like multipage TIFF and animated GIF + - support for palette images like GIF + - support all colorspaces and imageformats knows by PIL + - read horizontal and vertical dpi from JPEG2000 files + +0.2.0 +----- + + - now Python3 only + - pep8 compliant code + - update my email to josch@mister-muffin.de + - move from github to gitlab.mister-muffin.de/josch/img2pdf + - use logging module + - add extensive test suite + - ability to read from standard input + - pdf writer: + - make more compatible with the interface of pdfrw module + - print floats which equal to their integer conversion as integer + - do not print trailing zeroes for floating point numbers + - print more linebreaks + - add binary string at beginning of PDF to indicate that the PDF + contains binary data + - handle datetime and unicode strings by using utf-16-be encoding + - new options (see --help for more details): + - --without-pdfrw + - --imgsize + - --border + - --fit + - --auto-orient + - --viewer-panes + - --viewer-initial-page + - --viewer-magnification + - --viewer-page-layout + - --viewer-fit-window + - --viewer-center-window + - --viewer-fullscreen + - remove short options for metadata command line arguments + - correctly encode and escape non-ascii metadata + - explicitly store date in UTC and allow parsing all date formats understood + by dateutil and `date --date` + +0.1.5 +----- + +- Enable support for CMYK images +- Rework test suite +- support file objects as input + +0.1.4 +----- + +- add Python 3 support +- make output reproducible by sorting and --nodate option + +0.1.3 +----- + +- Avoid leaking file descriptors +- Convert unrecognized colorspaces to RGB + +0.1.1 +----- + +- allow running src/img2pdf.py standalone +- license change from GPL to LGPL +- Add pillow 2.4.0 support +- add options to specify pdf dimensions in points + +0.1.0 (unreleased) +------------------ + +- Initial PyPI release. +- Modified code to create proper package. +- Added tests. +- Added console script entry point. diff --git a/MANIFEST.in b/MANIFEST.in index 534bab3..4ee2b37 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,9 @@ include README.md include test_comp.sh +include CHANGES.rst recursive-include src *.jpg recursive-include src *.pdf recursive-include src *.png +recursive-include src *.tif +recursive-include src *.gif recursive-include src *.py diff --git a/PKG-INFO b/PKG-INFO index 870fa2d..e3ecf4b 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,43 +1,46 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.2.3 +Version: 0.3.0 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.0 +Description-Content-Type: UNKNOWN Description: img2pdf ======= Losslessly convert raster images to PDF. The file size will not unnecessarily - increase. One major application would be a number of scans made in JPEG format - which should now become part of a single PDF document. Existing solutions - would either re-encode the input JPEG files (leading to quality loss) or store - them in the zip/flate format which results into the PDF becoming unnecessarily - large in terms of its file size. + increase. It can for example be used to create a PDF document from a number of + scans that are only available in JPEG format. Existing solutions would either + re-encode the input JPEG files (leading to quality loss) or store them in the + zip/flate format which results into the PDF becoming unnecessarily large in + terms of its file size. Background ---------- - Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by - embedding them without re-encoding. I wrote this piece of python code. - because I was missing a tool to do this automatically. Img2pdf basically just - wraps JPEG images into the PDF container as they are. + Quality loss can be avoided when converting PNG, JPEG and JPEG2000 images to + PDF by embedding them into the PDF without re-encoding them. This is what + img2pdf does. It thus treats the PDF format merely as a container format for + storing one or more JPEGs or PNGs without re-encoding the images themselves. - If you know an existing tool which allows one to embed JPEG and JPEG2000 images - into a PDF container without recompression, please contact me so that I can put - this code into the garbage bin. + If you know an existing tool which allows one to embed PNG, JPEG and JPEG2000 + images into a PDF container without recompression, please contact me so that I + can put this code into the garbage bin. Functionality ------------- - This program will take a list of images and produce a PDF file with the images - embedded in it. JPEG and JPEG2000 images will be included without - recompression. Raster images in other formats will be included with zip/flate - encoding which usually leads to an increase in the resulting size because - formats like png compress better than PDF which just zip/flate compresses the - RGB data. As a result, this tool is able to losslessly wrap images into a PDF + This program will take a list of raster images and produce a PDF file with the + images embedded in it. PNG, JPEG and JPEG2000 images will be included without + recompression and the resulting PDF will only be slightly larger than the input + images due to the overhead of the PDF container. Raster images in other + formats (like gif or tif) will be included using the lossless zip/flate + encoding using the PNG Paeth predictor. + + As a result, this tool is able to losslessly wrap raster images into a PDF container with a quality to filesize ratio that is typically better (in case of JPEG and JPEG2000 images) or equal (in case of other formats) than that of existing tools. @@ -61,13 +64,17 @@ Description: img2pdf However, this approach will result in PDF files that are a few times larger than the input JPEG or JPEG2000 file. - img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF + Furthermore, when converting PNG images, popular tools like imagemagick use + flate encoding without a predictor. This means, that image file size ends up + being several orders of magnitude larger then necessary. + + img2pdf is able to losslessly embed PNG, JPEG and JPEG2000 files into a PDF container without additional overhead (aside from the PDF structure itself), save other graphics formats using lossless zip compression, and produce multi-page PDF files when more than one input image is given. - Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf - is several times faster than with other tools. + Also, since PNG, JPEG and JPEG2000 images are not reencoded, conversion with + img2pdf is several times faster than with other tools. Usage ----- @@ -76,7 +83,9 @@ Description: img2pdf descriptor. If no output file is specified with the `-o`/`--output` option, output will be - done to stdout. + done to stdout. A typical invocation is: + + img2pdf img1.png img2.jpg -o out.pdf The detailed documentation can be accessed by running: @@ -89,14 +98,6 @@ Description: img2pdf If you find a JPEG or JPEG2000 file that, when embedded cannot be read by the Adobe Acrobat Reader, please contact me. - For lossless conversion of formats other than JPEG or JPEG2000, zip/flate - encoding is used. This choice is based on tests I did with a number of images. - I converted them into PDF using the lossless variants of the compression - formats offered by imagemagick. In all my tests, zip/flate encoding performed - best. You can verify my findings using the test_comp.sh script with any input - image given as a commandline argument. If you find an input file that is - outperformed by another lossless compression method, contact me. - I have not yet figured out how to determine the colorspace of JPEG2000 files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with other colorspaces, you must explicitly specify it using the `--colorspace` @@ -123,19 +124,19 @@ Description: img2pdf You can then install the package using: - $ pip install img2pdf + $ pip3 install img2pdf If you prefer to install from source code use: $ cd img2pdf/ - $ pip install . + $ pip3 install . To test the console script without installing the package on your system, use virtualenv: $ cd img2pdf/ $ virtualenv ve - $ ve/bin/pip install . + $ ve/bin/pip3 install . You can then test the converter using: @@ -144,10 +145,36 @@ Description: img2pdf The package can also be used as a library: import img2pdf - pdf_bytes = img2pdf.convert('test.jpg') - file = open("name.pdf","wb") - file.write(pdf_bytes) + # opening from filename + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg')) + + # opening from file handle + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + f1.write(img2pdf.convert(f2)) + + # using in-memory image data + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("\x89PNG...") + + # multiple inputs (variant 1) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("test1.jpg", "test2.png")) + + # multiple inputs (variant 2) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert(["test1.jpg", "test2.png"])) + + # writing to file descriptor + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + img2pdf.convert(f2, outputstream=f1) + + # specify paper size (A4) + a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) + layout_fun = img2pdf.get_layout_fun(a4inpt) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) Keywords: jpeg pdf converter Platform: UNKNOWN @@ -156,9 +183,12 @@ Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Other Audience Classifier: Environment :: Console Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English Classifier: Operating System :: OS Independent diff --git a/README.md b/README.md index 27637d6..249abb8 100644 --- a/README.md +++ b/README.md @@ -2,33 +2,35 @@ img2pdf ======= Losslessly convert raster images to PDF. The file size will not unnecessarily -increase. One major application would be a number of scans made in JPEG format -which should now become part of a single PDF document. Existing solutions -would either re-encode the input JPEG files (leading to quality loss) or store -them in the zip/flate format which results into the PDF becoming unnecessarily -large in terms of its file size. +increase. It can for example be used to create a PDF document from a number of +scans that are only available in JPEG format. Existing solutions would either +re-encode the input JPEG files (leading to quality loss) or store them in the +zip/flate format which results into the PDF becoming unnecessarily large in +terms of its file size. Background ---------- -Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by -embedding them without re-encoding. I wrote this piece of python code. -because I was missing a tool to do this automatically. Img2pdf basically just -wraps JPEG images into the PDF container as they are. +Quality loss can be avoided when converting PNG, JPEG and JPEG2000 images to +PDF by embedding them into the PDF without re-encoding them. This is what +img2pdf does. It thus treats the PDF format merely as a container format for +storing one or more JPEGs or PNGs without re-encoding the images themselves. -If you know an existing tool which allows one to embed JPEG and JPEG2000 images -into a PDF container without recompression, please contact me so that I can put -this code into the garbage bin. +If you know an existing tool which allows one to embed PNG, JPEG and JPEG2000 +images into a PDF container without recompression, please contact me so that I +can put this code into the garbage bin. Functionality ------------- -This program will take a list of images and produce a PDF file with the images -embedded in it. JPEG and JPEG2000 images will be included without -recompression. Raster images in other formats will be included with zip/flate -encoding which usually leads to an increase in the resulting size because -formats like png compress better than PDF which just zip/flate compresses the -RGB data. As a result, this tool is able to losslessly wrap images into a PDF +This program will take a list of raster images and produce a PDF file with the +images embedded in it. PNG, JPEG and JPEG2000 images will be included without +recompression and the resulting PDF will only be slightly larger than the input +images due to the overhead of the PDF container. Raster images in other +formats (like gif or tif) will be included using the lossless zip/flate +encoding using the PNG Paeth predictor. + +As a result, this tool is able to losslessly wrap raster images into a PDF container with a quality to filesize ratio that is typically better (in case of JPEG and JPEG2000 images) or equal (in case of other formats) than that of existing tools. @@ -52,13 +54,17 @@ imagemagick, one has to use zip compression: However, this approach will result in PDF files that are a few times larger than the input JPEG or JPEG2000 file. -img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF +Furthermore, when converting PNG images, popular tools like imagemagick use +flate encoding without a predictor. This means, that image file size ends up +being several orders of magnitude larger then necessary. + +img2pdf is able to losslessly embed PNG, JPEG and JPEG2000 files into a PDF container without additional overhead (aside from the PDF structure itself), save other graphics formats using lossless zip compression, and produce multi-page PDF files when more than one input image is given. -Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf -is several times faster than with other tools. +Also, since PNG, JPEG and JPEG2000 images are not reencoded, conversion with +img2pdf is several times faster than with other tools. Usage ----- @@ -67,7 +73,9 @@ The images must be provided as files because img2pdf needs to seek in the file descriptor. If no output file is specified with the `-o`/`--output` option, output will be -done to stdout. +done to stdout. A typical invocation is: + + img2pdf img1.png img2.jpg -o out.pdf The detailed documentation can be accessed by running: @@ -80,14 +88,6 @@ Bugs If you find a JPEG or JPEG2000 file that, when embedded cannot be read by the Adobe Acrobat Reader, please contact me. -For lossless conversion of formats other than JPEG or JPEG2000, zip/flate -encoding is used. This choice is based on tests I did with a number of images. -I converted them into PDF using the lossless variants of the compression -formats offered by imagemagick. In all my tests, zip/flate encoding performed -best. You can verify my findings using the test_comp.sh script with any input -image given as a commandline argument. If you find an input file that is -outperformed by another lossless compression method, contact me. - I have not yet figured out how to determine the colorspace of JPEG2000 files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with other colorspaces, you must explicitly specify it using the `--colorspace` @@ -114,19 +114,19 @@ with the following command: You can then install the package using: - $ pip install img2pdf + $ pip3 install img2pdf If you prefer to install from source code use: $ cd img2pdf/ - $ pip install . + $ pip3 install . To test the console script without installing the package on your system, use virtualenv: $ cd img2pdf/ $ virtualenv ve - $ ve/bin/pip install . + $ ve/bin/pip3 install . You can then test the converter using: @@ -135,7 +135,33 @@ You can then test the converter using: The package can also be used as a library: import img2pdf - pdf_bytes = img2pdf.convert('test.jpg') - file = open("name.pdf","wb") - file.write(pdf_bytes) + # opening from filename + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg')) + + # opening from file handle + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + f1.write(img2pdf.convert(f2)) + + # using in-memory image data + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("\x89PNG...") + + # multiple inputs (variant 1) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("test1.jpg", "test2.png")) + + # multiple inputs (variant 2) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert(["test1.jpg", "test2.png"])) + + # writing to file descriptor + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + img2pdf.convert(f2, outputstream=f1) + + # specify paper size (A4) + a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) + layout_fun = img2pdf.get_layout_fun(a4inpt) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) diff --git a/setup.cfg b/setup.cfg index 8c9157d..9f88734 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,5 +4,4 @@ description-file = README.md [egg_info] tag_build = tag_date = 0 -tag_svn_revision = 0 diff --git a/setup.py b/setup.py index 874380c..56e9c4c 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,21 @@ +import sys from setuptools import setup -VERSION = "0.2.3" +PY3 = sys.version_info[0] >= 3 + +VERSION = "0.3.0" + +INSTALL_REQUIRES = ( + 'Pillow', +) + +TESTS_REQUIRE = ( + 'pdfrw', +) + +if not PY3: + INSTALL_REQUIRES += ('enum34',) + setup( name='img2pdf', @@ -17,9 +32,12 @@ setup( 'Intended Audience :: Other Audience', 'Environment :: Console', 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: Implementation :: CPython', + "Programming Language :: Python :: Implementation :: PyPy", 'License :: OSI Approved :: GNU Lesser General Public License v3 ' '(LGPLv3)', 'Natural Language :: English', @@ -32,9 +50,11 @@ setup( include_package_data=True, test_suite='tests.test_suite', zip_safe=True, - install_requires=( - 'Pillow', - ), + install_requires=INSTALL_REQUIRES, + tests_requires=TESTS_REQUIRE, + extras_require={ + 'test': TESTS_REQUIRE, + }, entry_points=''' [console_scripts] img2pdf = img2pdf:main diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO index 870fa2d..e3ecf4b 100644 --- a/src/img2pdf.egg-info/PKG-INFO +++ b/src/img2pdf.egg-info/PKG-INFO @@ -1,43 +1,46 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.2.3 +Version: 0.3.0 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.0 +Description-Content-Type: UNKNOWN Description: img2pdf ======= Losslessly convert raster images to PDF. The file size will not unnecessarily - increase. One major application would be a number of scans made in JPEG format - which should now become part of a single PDF document. Existing solutions - would either re-encode the input JPEG files (leading to quality loss) or store - them in the zip/flate format which results into the PDF becoming unnecessarily - large in terms of its file size. + increase. It can for example be used to create a PDF document from a number of + scans that are only available in JPEG format. Existing solutions would either + re-encode the input JPEG files (leading to quality loss) or store them in the + zip/flate format which results into the PDF becoming unnecessarily large in + terms of its file size. Background ---------- - Quality loss can be avoided when converting JPEG and JPEG2000 images to PDF by - embedding them without re-encoding. I wrote this piece of python code. - because I was missing a tool to do this automatically. Img2pdf basically just - wraps JPEG images into the PDF container as they are. + Quality loss can be avoided when converting PNG, JPEG and JPEG2000 images to + PDF by embedding them into the PDF without re-encoding them. This is what + img2pdf does. It thus treats the PDF format merely as a container format for + storing one or more JPEGs or PNGs without re-encoding the images themselves. - If you know an existing tool which allows one to embed JPEG and JPEG2000 images - into a PDF container without recompression, please contact me so that I can put - this code into the garbage bin. + If you know an existing tool which allows one to embed PNG, JPEG and JPEG2000 + images into a PDF container without recompression, please contact me so that I + can put this code into the garbage bin. Functionality ------------- - This program will take a list of images and produce a PDF file with the images - embedded in it. JPEG and JPEG2000 images will be included without - recompression. Raster images in other formats will be included with zip/flate - encoding which usually leads to an increase in the resulting size because - formats like png compress better than PDF which just zip/flate compresses the - RGB data. As a result, this tool is able to losslessly wrap images into a PDF + This program will take a list of raster images and produce a PDF file with the + images embedded in it. PNG, JPEG and JPEG2000 images will be included without + recompression and the resulting PDF will only be slightly larger than the input + images due to the overhead of the PDF container. Raster images in other + formats (like gif or tif) will be included using the lossless zip/flate + encoding using the PNG Paeth predictor. + + As a result, this tool is able to losslessly wrap raster images into a PDF container with a quality to filesize ratio that is typically better (in case of JPEG and JPEG2000 images) or equal (in case of other formats) than that of existing tools. @@ -61,13 +64,17 @@ Description: img2pdf However, this approach will result in PDF files that are a few times larger than the input JPEG or JPEG2000 file. - img2pdf is able to losslessly embed JPEG and JPEG2000 files into a PDF + Furthermore, when converting PNG images, popular tools like imagemagick use + flate encoding without a predictor. This means, that image file size ends up + being several orders of magnitude larger then necessary. + + img2pdf is able to losslessly embed PNG, JPEG and JPEG2000 files into a PDF container without additional overhead (aside from the PDF structure itself), save other graphics formats using lossless zip compression, and produce multi-page PDF files when more than one input image is given. - Also, since JPEG and JPEG2000 images are not reencoded, conversion with img2pdf - is several times faster than with other tools. + Also, since PNG, JPEG and JPEG2000 images are not reencoded, conversion with + img2pdf is several times faster than with other tools. Usage ----- @@ -76,7 +83,9 @@ Description: img2pdf descriptor. If no output file is specified with the `-o`/`--output` option, output will be - done to stdout. + done to stdout. A typical invocation is: + + img2pdf img1.png img2.jpg -o out.pdf The detailed documentation can be accessed by running: @@ -89,14 +98,6 @@ Description: img2pdf If you find a JPEG or JPEG2000 file that, when embedded cannot be read by the Adobe Acrobat Reader, please contact me. - For lossless conversion of formats other than JPEG or JPEG2000, zip/flate - encoding is used. This choice is based on tests I did with a number of images. - I converted them into PDF using the lossless variants of the compression - formats offered by imagemagick. In all my tests, zip/flate encoding performed - best. You can verify my findings using the test_comp.sh script with any input - image given as a commandline argument. If you find an input file that is - outperformed by another lossless compression method, contact me. - I have not yet figured out how to determine the colorspace of JPEG2000 files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with other colorspaces, you must explicitly specify it using the `--colorspace` @@ -123,19 +124,19 @@ Description: img2pdf You can then install the package using: - $ pip install img2pdf + $ pip3 install img2pdf If you prefer to install from source code use: $ cd img2pdf/ - $ pip install . + $ pip3 install . To test the console script without installing the package on your system, use virtualenv: $ cd img2pdf/ $ virtualenv ve - $ ve/bin/pip install . + $ ve/bin/pip3 install . You can then test the converter using: @@ -144,10 +145,36 @@ Description: img2pdf The package can also be used as a library: import img2pdf - pdf_bytes = img2pdf.convert('test.jpg') - file = open("name.pdf","wb") - file.write(pdf_bytes) + # opening from filename + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg')) + + # opening from file handle + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + f1.write(img2pdf.convert(f2)) + + # using in-memory image data + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("\x89PNG...") + + # multiple inputs (variant 1) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("test1.jpg", "test2.png")) + + # multiple inputs (variant 2) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert(["test1.jpg", "test2.png"])) + + # writing to file descriptor + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + img2pdf.convert(f2, outputstream=f1) + + # specify paper size (A4) + a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) + layout_fun = img2pdf.get_layout_fun(a4inpt) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) Keywords: jpeg pdf converter Platform: UNKNOWN @@ -156,9 +183,12 @@ Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Other Audience Classifier: Environment :: Console Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English Classifier: Operating System :: OS Independent diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt index add31f1..ae6e816 100644 --- a/src/img2pdf.egg-info/SOURCES.txt +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -1,3 +1,4 @@ +CHANGES.rst MANIFEST.in README.md setup.cfg @@ -15,11 +16,18 @@ src/img2pdf.egg-info/top_level.txt src/img2pdf.egg-info/zip-safe src/tests/__init__.py src/tests/input/CMYK.jpg +src/tests/input/CMYK.tif +src/tests/input/animation.gif +src/tests/input/gray.png src/tests/input/mono.png +src/tests/input/mono.tif src/tests/input/normal.jpg src/tests/input/normal.png src/tests/output/CMYK.jpg.pdf src/tests/output/CMYK.tif.pdf +src/tests/output/animation.gif.pdf +src/tests/output/gray.png.pdf src/tests/output/mono.png.pdf +src/tests/output/mono.tif.pdf src/tests/output/normal.jpg.pdf src/tests/output/normal.png.pdf \ No newline at end of file diff --git a/src/img2pdf.egg-info/requires.txt b/src/img2pdf.egg-info/requires.txt index 7e2fba5..3a24589 100644 --- a/src/img2pdf.egg-info/requires.txt +++ b/src/img2pdf.egg-info/requires.txt @@ -1 +1,4 @@ Pillow + +[test] +pdfrw diff --git a/src/img2pdf.py b/src/img2pdf.py index 20fe784..48ef964 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- # Copyright (C) 2012-2014 Johannes 'josch' Schauer # @@ -27,8 +28,11 @@ from jp2 import parsejp2 from enum import Enum from io import BytesIO import logging +import struct -__version__ = "0.2.3" +PY3 = sys.version_info[0] >= 3 + +__version__ = "0.3.0" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", @@ -58,7 +62,7 @@ PageOrientation = Enum('PageOrientation', 'portrait landscape') Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other') -ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 other') +ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 PNG other') PageMode = Enum('PageMode', 'none outlines thumbs') @@ -268,17 +272,33 @@ class MyPdfWriter(): self.addobj(page) -class MyPdfString(): - @classmethod - def encode(cls, string): - try: - string = string.encode('ascii') - except UnicodeEncodeError: - string = b"\xfe\xff"+string.encode("utf-16-be") - string = string.replace(b'\\', b'\\\\') - string = string.replace(b'(', b'\\(') - string = string.replace(b')', b'\\)') - return b'(' + string + b')' +if PY3: + class MyPdfString(): + @classmethod + def encode(cls, string, hextype=False): + if hextype: + return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >' + else: + try: + string = string.encode('ascii') + except UnicodeEncodeError: + string = b"\xfe\xff"+string.encode("utf-16-be") + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') + return b'(' + string + b')' +else: + class MyPdfString(object): + @classmethod + def encode(cls, string, hextype=False): + if hextype: + return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >' + else: + # This mimics exactely to what pdfrw does. + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') + return b'(' + string + b')' class pdfdoc(object): @@ -354,14 +374,15 @@ class pdfdoc(object): def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, - pageheight): + pageheight, userunit=None, palette=None): if self.with_pdfrw: - from pdfrw import PdfDict, PdfName, PdfObject + from pdfrw import PdfDict, PdfName, PdfObject, PdfString from pdfrw.py23_diffs import convert_load else: PdfDict = MyPdfDict PdfName = MyPdfName PdfObject = MyPdfObject + PdfString = MyPdfString convert_load = my_convert_load if color == Colorspace['1'] or color == Colorspace.L: @@ -370,21 +391,24 @@ class pdfdoc(object): colorspace = PdfName.DeviceRGB elif color == Colorspace.CMYK or color == Colorspace['CMYK;I']: colorspace = PdfName.DeviceCMYK + elif color == Colorspace.P: + if self.with_pdfrw: + raise Exception("pdfrw does not support hex strings for palette image input, re-run with --without-pdfrw") + colorspace = [ PdfName.Indexed, PdfName.DeviceRGB, len(palette)-1, PdfString.encode(palette, hextype=True)] else: raise UnsupportedColorspaceError("unsupported color space: %s" % color.name) # either embed the whole jpeg or deflate the bitmap representation - logging.debug(imgformat) if imgformat is ImageFormat.JPEG: - ofilter = [PdfName.DCTDecode] + ofilter = PdfName.DCTDecode elif imgformat is ImageFormat.JPEG2000: - ofilter = [PdfName.JPXDecode] + ofilter = PdfName.JPXDecode self.writer.version = "1.5" # jpeg2000 needs pdf 1.5 elif imgformat is ImageFormat.CCITTGroup4: ofilter = [PdfName.CCITTFaxDecode] else: - ofilter = [PdfName.FlateDecode] + ofilter = PdfName.FlateDecode image = PdfDict(stream=convert_load(imgdata)) @@ -398,7 +422,17 @@ class pdfdoc(object): if imgformat is ImageFormat.CCITTGroup4: image[PdfName.BitsPerComponent] = 1 else: - image[PdfName.BitsPerComponent] = 8 + if color == Colorspace['1']: + image[PdfName.BitsPerComponent] = 1 + elif color == Colorspace.P: + if len(palette) <= 2**1: + image[PdfName.BitsPerComponent] = 1 + elif len(palette) <= 2**4: + image[PdfName.BitsPerComponent] = 4 + else: + image[PdfName.BitsPerComponent] = 8 + else: + image[PdfName.BitsPerComponent] = 8 if color == Colorspace['CMYK;I']: # Inverts all four channels @@ -411,6 +445,26 @@ class pdfdoc(object): decodeparms[PdfName.Columns] = imgwidthpx decodeparms[PdfName.Rows] = imgheightpx image[PdfName.DecodeParms] = [decodeparms] + elif imgformat is ImageFormat.PNG: + decodeparms = PdfDict() + decodeparms[PdfName.Predictor] = 15 + if color in [ Colorspace.P, Colorspace['1'], Colorspace.L ]: + decodeparms[PdfName.Colors] = 1 + else: + decodeparms[PdfName.Colors] = 3 + decodeparms[PdfName.Columns] = imgwidthpx + if color == Colorspace['1']: + decodeparms[PdfName.BitsPerComponent] = 1 + elif color == Colorspace.P: + if len(palette) <= 2**1: + decodeparms[PdfName.BitsPerComponent] = 1 + elif len(palette) <= 2**4: + decodeparms[PdfName.BitsPerComponent] = 4 + else: + decodeparms[PdfName.BitsPerComponent] = 8 + else: + decodeparms[PdfName.BitsPerComponent] = 8 + image[PdfName.DecodeParms] = decodeparms text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii") @@ -423,6 +477,11 @@ class pdfdoc(object): page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight] page[PdfName.Resources] = resources page[PdfName.Contents] = content + if userunit is not None: + # /UserUnit requires PDF 1.6 + if self.writer.version < '1.6': + self.writer.version = '1.6' + page[PdfName.UserUnit] = userunit self.writer.addpage(page) @@ -582,6 +641,21 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) ics = imgdata.mode + if ics in ["LA", "PA", "RGBA"]: + logging.warning("Image contains transparency which cannot be retained in PDF.") + logging.warning("img2pdf will not perform a lossy operation.") + logging.warning("You can remove the alpha channel using imagemagick:") + logging.warning(" $ convert input.png -background white -alpha remove -alpha off output.png") + raise Exception("Refusing to work on images with alpha channel") + + + # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0 + # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG + # header. In that case it can happen that the horizontal and vertical DPI + # are set to zero. + if ndpi == (0, 0): + ndpi = (default_dpi, default_dpi) + logging.debug("input dpi = %d x %d", *ndpi) if colorspace: @@ -621,7 +695,13 @@ def transcode_monochrome(imgdata): # Convert the image to Group 4 in memory. If libtiff is not installed and # Pillow is not compiled against it, .save() will raise an exception. newimgio = BytesIO() - imgdata.save(newimgio, format='TIFF', compression='group4') + + # we create a whole new PIL image or otherwise it might happen with some + # input images, that libtiff fails an assert and the whole process is + # killed by a SIGABRT: + # https://gitlab.mister-muffin.de/josch/img2pdf/issues/46 + im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes()) + im.save(newimgio, format='TIFF', compression='group4') # Open new image in memory newimgio.seek(0) @@ -649,6 +729,25 @@ def transcode_monochrome(imgdata): return ccittdata +def parse_png(rawdata): + pngidat = b"" + palette = [] + i = 16 + while i < len(rawdata): + # once we can require Python >= 3.2 we can use int.from_bytes() instead + n, = struct.unpack('>I', rawdata[i-8:i-4]) + if i + n > len(rawdata): + raise Exception("invalid png: %d %d %d"%(i, n, len(rawdata))) + if rawdata[i-4:i] == b"IDAT": + pngidat += rawdata[i:i+n] + elif rawdata[i-4:i] == b"PLTE": + for j in range(i, i+n, 3): + # with int.from_bytes() we would not have to prepend extra zeroes + color, = struct.unpack('>I', b'\x00'+rawdata[j:j+3]) + palette.append(color) + i += n + i += 12 + return pngidat, palette def read_images(rawdata, colorspace, first_frame_only=False): im = BytesIO(rawdata) @@ -658,7 +757,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): imgdata = Image.open(im) except IOError as e: # test if it is a jpeg2000 image - if rawdata[:12] != "\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": raise ImageOpenError("cannot read input image (not jpeg2000). " "PIL: error reading image: %s" % e) # image is jpeg2000 @@ -675,6 +774,8 @@ def read_images(rawdata, colorspace, first_frame_only=False): # depending on the input format, determine whether to pass the raw # image or the zlib compressed color information + + # JPEG and JPEG2000 can be embedded into the PDF as-is if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace, rawdata) @@ -685,81 +786,106 @@ def read_images(rawdata, colorspace, first_frame_only=False): if color == Colorspace['RGBA']: raise JpegColorspaceError("jpeg can't have an alpha channel") im.close() - return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx)] - else: - result = [] - img_page_count = 0 - # loop through all frames of the image (example: multipage TIFF) - while True: - try: - imgdata.seek(img_page_count) - except EOFError: - break + return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [])] + + # We can directly embed the IDAT chunk of PNG images if the PNG is not + # interlaced + # + # PIL does not provide the information whether a PNG was stored interlaced + # or not. Thus, we retrieve that info manually by looking at byte 13 in the + # IHDR chunk. We know where to find that in the file because the IHDR chunk + # must be the first chunk. + if imgformat == ImageFormat.PNG and rawdata[28] == 0: + color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata) + pngidat, palette = parse_png(rawdata) + return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, palette)] - if first_frame_only and img_page_count > 0: - break + # Everything else has to be encoded - logging.debug("Converting frame: %d" % img_page_count) + result = [] + img_page_count = 0 + # loop through all frames of the image (example: multipage TIFF) + while True: + try: + imgdata.seek(img_page_count) + except EOFError: + break - color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace) + if first_frame_only and img_page_count > 0: + break - newimg = None - if color == Colorspace['1']: - try: - ccittdata = transcode_monochrome(imgdata) - imgformat = ImageFormat.CCITTGroup4 - result.append((color, ndpi, imgformat, ccittdata, - imgwidthpx, imgheightpx)) - img_page_count += 1 - continue - except Exception as e: - logging.debug(e) - logging.debug("Converting colorspace 1 to L") - newimg = imgdata.convert('L') - color = Colorspace.L - elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, - Colorspace["CMYK;I"]]: - logging.debug("Colorspace is OK: %s", color) - newimg = imgdata - elif color in [Colorspace.RGBA, Colorspace.P, Colorspace.other]: - logging.debug("Converting colorspace %s to RGB", color) - newimg = imgdata.convert('RGB') - color = Colorspace.RGB - else: - raise ValueError("unknown colorspace: %s" % color.name) + logging.debug("Converting frame: %d" % img_page_count) + + color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace) + + newimg = None + if color == Colorspace['1']: + try: + ccittdata = transcode_monochrome(imgdata) + imgformat = ImageFormat.CCITTGroup4 + result.append((color, ndpi, imgformat, ccittdata, + imgwidthpx, imgheightpx, [])) + img_page_count += 1 + continue + except Exception as e: + logging.debug(e) + logging.debug("Converting colorspace 1 to L") + newimg = imgdata.convert('L') + color = Colorspace.L + elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, + Colorspace["CMYK;I"], Colorspace.P]: + logging.debug("Colorspace is OK: %s", color) + newimg = imgdata + else: + raise ValueError("unknown or unsupported colorspace: %s" % color.name) + # the PNG format does not support CMYK, so we fall back to normal + # compression + if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]: imggz = zlib.compress(newimg.tobytes()) result.append((color, ndpi, imgformat, imggz, imgwidthpx, - imgheightpx)) - img_page_count += 1 - # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the - # close() method - try: - imgdata.close() - except AttributeError: - pass - im.close() - return result + imgheightpx, [])) + else: + # cheapo version to retrieve a PNG encoding of the payload is to + # just save it with PIL. In the future this could be replaced by + # dedicated function applying the Paeth PNG filter to the raw pixel + pngbuffer = BytesIO() + newimg.save(pngbuffer, format="png") + pngidat, palette = parse_png(pngbuffer.getvalue()) + imgformat = ImageFormat.PNG + result.append((color, ndpi, imgformat, pngidat, imgwidthpx, + imgheightpx, palette)) + img_page_count += 1 + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the + # close() method + try: + imgdata.close() + except AttributeError: + pass + im.close() + return result # converts a length in pixels to a length in PDF units (1/72 of an inch) def px_to_pt(length, dpi): - return 72*length/dpi + return 72.0*length/dpi def cm_to_pt(length): - return (72*length)/2.54 + return (72.0*length)/2.54 def mm_to_pt(length): - return (72*length)/25.4 + return (72.0*length)/25.4 def in_to_pt(length): - return 72*length + return 72.0*length -def get_layout_fun(pagesize, imgsize, border, fit, auto_orient): +def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None, + auto_orient=False): def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight): if fitwidth is None and fitheight is None: raise ValueError("fitwidth and fitheight cannot both be None") @@ -970,6 +1096,17 @@ def get_fixed_dpi_layout_fun(fixed_dpi): return fixed_dpi_layout_fun +def find_scale(pagewidth, pageheight): + """Find the power of 10 (10, 100, 1000...) that will reduce the scale + below the PDF specification limit of 14400 PDF units (=200 inches)""" + from math import log10, ceil + + major = max(pagewidth, pageheight) + oversized = major / 14400.0 + + return 10 ** ceil(log10(oversized)) + + # given one or more input image, depending on outputstream, either return a # string containing the whole PDF if outputstream is None or write the PDF # data to the given file-like object and return None @@ -977,20 +1114,31 @@ def get_fixed_dpi_layout_fun(fixed_dpi): # Input images can be given as file like objects (they must implement read()), # as a binary string representing the image content or as filenames to the # images. -def convert(*images, title=None, - author=None, creator=None, producer=None, creationdate=None, - moddate=None, subject=None, keywords=None, colorspace=None, - nodate=False, layout_fun=default_layout_fun, viewer_panes=None, - viewer_initial_page=None, viewer_magnification=None, - viewer_page_layout=None, viewer_fit_window=False, - viewer_center_window=False, viewer_fullscreen=False, - with_pdfrw=True, outputstream=None, first_frame_only=False): - - pdf = pdfdoc("1.3", title, author, creator, producer, creationdate, - moddate, subject, keywords, nodate, viewer_panes, - viewer_initial_page, viewer_magnification, viewer_page_layout, - viewer_fit_window, viewer_center_window, viewer_fullscreen, - with_pdfrw) +def convert(*images, **kwargs): + + _default_kwargs = dict( + title=None, + author=None, creator=None, producer=None, creationdate=None, + moddate=None, subject=None, keywords=None, colorspace=None, + nodate=False, layout_fun=default_layout_fun, viewer_panes=None, + viewer_initial_page=None, viewer_magnification=None, + viewer_page_layout=None, viewer_fit_window=False, + viewer_center_window=False, viewer_fullscreen=False, + with_pdfrw=True, outputstream=None, first_frame_only=False, + allow_oversized=True) + for kwname, default in _default_kwargs.items(): + if kwname not in kwargs: + kwargs[kwname] = default + + pdf = pdfdoc( + "1.3", + kwargs['title'], kwargs['author'], kwargs['creator'], + kwargs['producer'], kwargs['creationdate'], kwargs['moddate'], + kwargs['subject'], kwargs['keywords'], kwargs['nodate'], + kwargs['viewer_panes'], kwargs['viewer_initial_page'], + kwargs['viewer_magnification'], kwargs['viewer_page_layout'], + kwargs['viewer_fit_window'], kwargs['viewer_center_window'], + kwargs['viewer_fullscreen'], kwargs['with_pdfrw']) # backwards compatibility with older img2pdf versions where the first # argument to the function had to be given as a list @@ -999,6 +1147,9 @@ def convert(*images, title=None, if isinstance(images[0], (list, tuple)): images = images[0] + if not isinstance(images, (list, tuple)): + images = [images] + for img in images: # img is allowed to be a path, a binary string representing image data # or a file-like object (really anything that implements read()) @@ -1019,25 +1170,35 @@ def convert(*images, title=None, # name so we now try treating it as raw image content rawdata = img - for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx \ - in read_images(rawdata, colorspace, first_frame_only): + for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, palette \ + in read_images( + rawdata, kwargs['colorspace'], kwargs['first_frame_only']): pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ - layout_fun(imgwidthpx, imgheightpx, ndpi) + kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi) + + userunit = None if pagewidth < 3.00 or pageheight < 3.00: logging.warning("pdf width or height is below 3.00 - too " "small for some viewers!") elif pagewidth > 14400.0 or pageheight > 14400.0: - raise PdfTooLargeError( + if kwargs['allow_oversized']: + userunit = find_scale(pagewidth, pageheight) + pagewidth /= userunit + pageheight /= userunit + imgwidthpdf /= userunit + imgheightpdf /= userunit + else: + raise PdfTooLargeError( "pdf width or height must not exceed 200 inches.") # the image is always centered on the page imgxpdf = (pagewidth - imgwidthpdf)/2.0 imgypdf = (pageheight - imgheightpdf)/2.0 pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, - imgypdf, pagewidth, pageheight) + imgypdf, pagewidth, pageheight, userunit, palette) - if outputstream: - pdf.tostream(outputstream) + if kwargs['outputstream']: + pdf.tostream(kwargs['outputstream']) return return pdf.tostring() @@ -1318,15 +1479,13 @@ def main(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description='''\ -Losslessly convert raster images to PDF without re-encoding JPEG and JPEG2000 -images. This leads to a lossless conversion of JPEG and JPEG2000 images with -the only added file size coming from the PDF container itself. - -Other raster graphics formats are losslessly stored in a zip/flate encoding of -their RGB representation. This might increase file size and does not store -transparency. There is nothing that can be done about that until the PDF format -allows embedding other image formats like PNG. Thus, img2pdf is primarily -useful to convert JPEG and JPEG2000 images to PDF. +Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and +JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000 +images with the only added file size coming from the PDF container itself. +Other raster graphics formats are losslessly stored using the same encoding +that PNG uses. Since PDF does not support images with transparency and since +img2pdf aims to never be lossy, input images with an alpha channel are not +supported. The output is sent to standard output so that it can be redirected into a file or to another program as part of a shell pipe. To directly write the output @@ -1501,6 +1660,15 @@ RGB.''') "input image be converted into a page in the resulting PDF." ) + outargs.add_argument( + "--pillow-limit-break", action="store_true", + help="img2pdf uses the Python Imaging Library Pillow to read input " + "images. Pillow limits the maximum input image size to %d pixels " + "to prevent decompression bomb denial of service attacks. If " + "your input image contains more pixels than that, use this " + "option to disable this safety measure during this run of img2pdf" + %Image.MAX_IMAGE_PIXELS) + sizeargs = parser.add_argument_group( title='Image and page size and layout arguments', description='''\ @@ -1674,6 +1842,9 @@ values set via the --border option. if args.verbose: logging.basicConfig(level=logging.DEBUG) + if args.pillow_limit_break: + Image.MAX_IMAGE_PIXELS = None + layout_fun = get_layout_fun(args.pagesize, args.imgsize, args.border, args.fit, args.auto_orient) diff --git a/src/tests/__init__.py b/src/tests/__init__.py index 506fc48..b1c1797 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -1,14 +1,23 @@ import unittest -import os import img2pdf +import os +import struct +import sys import zlib from PIL import Image -from io import BytesIO -import struct +from io import StringIO, BytesIO HERE = os.path.dirname(__file__) +PY3 = sys.version_info[0] >= 3 + +if PY3: + PdfReaderIO = StringIO +else: + PdfReaderIO = BytesIO + + # convert +set date:create +set date:modify -define png:exclude-chunk=time # we define some variables so that the table below can be narrower @@ -17,6 +26,7 @@ psp = (504, 972) # --pagesize portrait isl = (756, 324) # --imgsize landscape isp = (324, 756) # --imgsize portrait border = (162, 270) # --border +poster = (97200, 50400) # there is no need to have test cases with the same images with inverted # orientation (landscape/portrait) because --pagesize and --imgsize are # already inverted @@ -395,6 +405,8 @@ layout_test_cases = [ (972, 504), (864, 432)), (psl, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 179 (972, 504), (864, 432)), + (poster, None, None, f_fill, 0, (97200, 50400), (151200, 50400), + (97200, 50400), (100800, 50400)), ] @@ -459,6 +471,10 @@ def test_suite(): files = os.listdir(os.path.join(HERE, "input")) for with_pdfrw, test_name in [(a, b) for a in [True, False] for b in files]: + # we do not test animation.gif with pdfrw because it doesn't support + # saving hexadecimal palette data + if test_name == 'animation.gif' and with_pdfrw: + continue inputf = os.path.join(HERE, "input", test_name) if not os.path.isfile(inputf): continue @@ -470,107 +486,142 @@ def test_suite(): orig_imgdata = inf.read() output = img2pdf.convert(orig_imgdata, nodate=True, with_pdfrw=with_pdfrw) - from io import StringIO, BytesIO from pdfrw import PdfReader, PdfName, PdfWriter from pdfrw.py23_diffs import convert_load, convert_store - x = PdfReader(StringIO(convert_load(output))) + x = PdfReader(PdfReaderIO(convert_load(output))) self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, PdfName.Size]) - self.assertEqual(x.Size, '7') + self.assertIn(x.Root.Pages.Count, ('1', '2')) + if len(x.Root.Pages.Kids) == '1': + self.assertEqual(x.Size, '7') + self.assertEqual(len(x.Root.Pages.Kids), 1) + elif len(x.Root.Pages.Kids) == '2': + self.assertEqual(x.Size, '10') + self.assertEqual(len(x.Root.Pages.Kids), 2) self.assertEqual(x.Info, {}) self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, PdfName.Type]) self.assertEqual(x.Root.Type, PdfName.Catalog) self.assertEqual(sorted(x.Root.Pages.keys()), [PdfName.Count, PdfName.Kids, PdfName.Type]) - self.assertEqual(x.Root.Pages.Count, '1') self.assertEqual(x.Root.Pages.Type, PdfName.Pages) - self.assertEqual(len(x.Root.Pages.Kids), 1) - self.assertEqual(sorted(x.Root.Pages.Kids[0].keys()), - [PdfName.Contents, PdfName.MediaBox, - PdfName.Parent, PdfName.Resources, PdfName.Type]) - self.assertEqual(x.Root.Pages.Kids[0].MediaBox, - ['0', '0', '115', '48']) - self.assertEqual(x.Root.Pages.Kids[0].Parent, x.Root.Pages) - self.assertEqual(x.Root.Pages.Kids[0].Type, PdfName.Page) - self.assertEqual(x.Root.Pages.Kids[0].Resources.keys(), - [PdfName.XObject]) - self.assertEqual(x.Root.Pages.Kids[0].Resources.XObject.keys(), - [PdfName.Im0]) - self.assertEqual(x.Root.Pages.Kids[0].Contents.keys(), - [PdfName.Length]) - self.assertEqual(x.Root.Pages.Kids[0].Contents.Length, - str(len(x.Root.Pages.Kids[0].Contents.stream))) - self.assertEqual(x.Root.Pages.Kids[0].Contents.stream, - "q\n115.0000 0 0 48.0000 0.0000 0.0000 cm\n/Im0 " - "Do\nQ") + orig_img = Image.open(f) + for pagenum in range(len(x.Root.Pages.Kids)): + # retrieve the original image frame that this page was + # generated from + orig_img.seek(pagenum) + cur_page = x.Root.Pages.Kids[pagenum] - imgprops = x.Root.Pages.Kids[0].Resources.XObject.Im0 + ndpi = orig_img.info.get("dpi", (96.0, 96.0)) + # In python3, the returned dpi value for some tiff images will + # not be an integer but a float. To make the behaviour of + # img2pdf the same between python2 and python3, we convert that + # float into an integer by rounding. + # Search online for the 72.009 dpi problem for more info. + ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) + imgwidthpx, imgheightpx = orig_img.size + pagewidth = 72.0*imgwidthpx/ndpi[0] + pageheight = 72.0*imgheightpx/ndpi[1] - # test if the filter is valid: - self.assertIn( - imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], - [PdfName.FlateDecode], - [PdfName.CCITTFaxDecode]]) - # test if the colorspace is valid - self.assertIn( - imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, - PdfName.DeviceCMYK]) - # test if the image has correct size - orig_img = Image.open(f) - self.assertEqual(imgprops.Width, str(orig_img.size[0])) - self.assertEqual(imgprops.Height, str(orig_img.size[1])) - # if the input file is a jpeg then it should've been copied - # verbatim into the PDF - if imgprops.Filter in [[PdfName.DCTDecode], [PdfName.JPXDecode]]: - self.assertEqual( - x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, - convert_load(orig_imgdata)) - elif imgprops.Filter == [PdfName.CCITTFaxDecode]: - tiff_header = tiff_header_for_ccitt( - int(imgprops.Width), int(imgprops.Height), - int(imgprops.Length), 4) - imgio = BytesIO() - imgio.write(tiff_header) - imgio.write(convert_store( - x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) - imgio.seek(0) - im = Image.open(imgio) - self.assertEqual(im.tobytes(), orig_img.tobytes()) - try: - im.close() - except AttributeError: - pass + def format_float(f): + if int(f) == f: + return str(int(f)) + else: + return ("%.4f" % f).rstrip("0") + + self.assertEqual(sorted(cur_page.keys()), + [PdfName.Contents, PdfName.MediaBox, + PdfName.Parent, PdfName.Resources, + PdfName.Type]) + self.assertEqual(cur_page.MediaBox, + ['0', '0', format_float(pagewidth), + format_float(pageheight)]) + self.assertEqual(cur_page.Parent, x.Root.Pages) + self.assertEqual(cur_page.Type, PdfName.Page) + self.assertEqual(cur_page.Resources.keys(), + [PdfName.XObject]) + self.assertEqual(cur_page.Resources.XObject.keys(), + [PdfName.Im0]) + self.assertEqual(cur_page.Contents.keys(), + [PdfName.Length]) + self.assertEqual(cur_page.Contents.Length, + str(len(cur_page.Contents.stream))) + self.assertEqual(cur_page.Contents.stream, + "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" + "/Im0 Do\nQ" % (pagewidth, pageheight)) + + imgprops = cur_page.Resources.XObject.Im0 + + # test if the filter is valid: + self.assertIn( + imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode, + PdfName.FlateDecode, + [PdfName.CCITTFaxDecode]]) + + # test if the image has correct size + self.assertEqual(imgprops.Width, str(orig_img.size[0])) + self.assertEqual(imgprops.Height, str(orig_img.size[1])) + # if the input file is a jpeg then it should've been copied + # verbatim into the PDF + if imgprops.Filter in [PdfName.DCTDecode, + PdfName.JPXDecode]: + self.assertEqual( + cur_page.Resources.XObject.Im0.stream, + convert_load(orig_imgdata)) + elif imgprops.Filter == [PdfName.CCITTFaxDecode]: + tiff_header = tiff_header_for_ccitt( + int(imgprops.Width), int(imgprops.Height), + int(imgprops.Length), 4) + imgio = BytesIO() + imgio.write(tiff_header) + imgio.write(convert_store( + cur_page.Resources.XObject.Im0.stream)) + imgio.seek(0) + im = Image.open(imgio) + self.assertEqual(im.tobytes(), orig_img.tobytes()) + try: + im.close() + except AttributeError: + pass - elif imgprops.Filter == [PdfName.FlateDecode]: - # otherwise, the data is flate encoded and has to be equal to - # the pixel data of the input image - imgdata = zlib.decompress( - convert_store( - x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) - colorspace = imgprops.ColorSpace - if colorspace == PdfName.DeviceGray: - colorspace = 'L' - elif colorspace == PdfName.DeviceRGB: - colorspace = 'RGB' - elif colorspace == PdfName.DeviceCMYK: - colorspace = 'CMYK' - else: - raise Exception("invalid colorspace") - im = Image.frombytes(colorspace, (int(imgprops.Width), - int(imgprops.Height)), - imgdata) - if orig_img.mode == '1': - orig_img = orig_img.convert("L") - elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): - orig_img = orig_img.convert("RGB") - self.assertEqual(im.tobytes(), orig_img.tobytes()) - # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have - # the close() method - try: - im.close() - except AttributeError: - pass + elif imgprops.Filter == PdfName.FlateDecode: + # otherwise, the data is flate encoded and has to be equal + # to the pixel data of the input image + imgdata = zlib.decompress( + convert_store(cur_page.Resources.XObject.Im0.stream)) + if imgprops.DecodeParms: + if orig_img.format == 'PNG': + pngidat, palette = img2pdf.parse_png(orig_imgdata) + else: + pngbuffer = BytesIO() + orig_img.save(pngbuffer, format="png") + pngidat, palette = img2pdf.parse_png(pngbuffer.getvalue()) + self.assertEqual(zlib.decompress(pngidat), imgdata) + else: + colorspace = imgprops.ColorSpace + if colorspace == PdfName.DeviceGray: + colorspace = 'L' + elif colorspace == PdfName.DeviceRGB: + colorspace = 'RGB' + elif colorspace == PdfName.DeviceCMYK: + colorspace = 'CMYK' + else: + raise Exception("invalid colorspace") + im = Image.frombytes(colorspace, (int(imgprops.Width), + int(imgprops.Height)), + imgdata) + if orig_img.mode == '1': + self.assertEqual(im.tobytes(), + orig_img.convert("L").tobytes()) + elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): + self.assertEqual(im.tobytes(), + orig_img.convert("RGB").tobytes()) + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not + # have the close() method + try: + im.close() + except AttributeError: + pass # now use pdfrw to parse and then write out both pdfs and check the # result for equality y = PdfReader(out) diff --git a/src/tests/input/CMYK.tif b/src/tests/input/CMYK.tif new file mode 100644 index 0000000..8e3803e Binary files /dev/null and b/src/tests/input/CMYK.tif differ diff --git a/src/tests/input/animation.gif b/src/tests/input/animation.gif new file mode 100644 index 0000000..af4b278 Binary files /dev/null and b/src/tests/input/animation.gif differ diff --git a/src/tests/input/gray.png b/src/tests/input/gray.png new file mode 100644 index 0000000..48247fd Binary files /dev/null and b/src/tests/input/gray.png differ diff --git a/src/tests/input/mono.tif b/src/tests/input/mono.tif new file mode 100644 index 0000000..53e85bc Binary files /dev/null and b/src/tests/input/mono.tif differ diff --git a/src/tests/input/normal.png b/src/tests/input/normal.png index 87b9a6e..394f965 100644 Binary files a/src/tests/input/normal.png and b/src/tests/input/normal.png differ diff --git a/src/tests/output/CMYK.jpg.pdf b/src/tests/output/CMYK.jpg.pdf index bfe67f3..9efbe16 100644 Binary files a/src/tests/output/CMYK.jpg.pdf and b/src/tests/output/CMYK.jpg.pdf differ diff --git a/src/tests/output/CMYK.tif.pdf b/src/tests/output/CMYK.tif.pdf index b00586b..242bac7 100644 Binary files a/src/tests/output/CMYK.tif.pdf and b/src/tests/output/CMYK.tif.pdf differ diff --git a/src/tests/output/animation.gif.pdf b/src/tests/output/animation.gif.pdf new file mode 100644 index 0000000..fdfd460 Binary files /dev/null and b/src/tests/output/animation.gif.pdf differ diff --git a/src/tests/output/gray.png.pdf b/src/tests/output/gray.png.pdf new file mode 100644 index 0000000..3f2d4c3 Binary files /dev/null and b/src/tests/output/gray.png.pdf differ diff --git a/src/tests/output/mono.png.pdf b/src/tests/output/mono.png.pdf index eda3ec7..c773715 100644 Binary files a/src/tests/output/mono.png.pdf and b/src/tests/output/mono.png.pdf differ diff --git a/src/tests/output/mono.tif.pdf b/src/tests/output/mono.tif.pdf new file mode 100644 index 0000000..d23e65e Binary files /dev/null and b/src/tests/output/mono.tif.pdf differ diff --git a/src/tests/output/normal.jpg.pdf b/src/tests/output/normal.jpg.pdf index 87d2645..7acbe20 100644 Binary files a/src/tests/output/normal.jpg.pdf and b/src/tests/output/normal.jpg.pdf differ diff --git a/src/tests/output/normal.png.pdf b/src/tests/output/normal.png.pdf index 2628c5d..971475f 100644 Binary files a/src/tests/output/normal.png.pdf and b/src/tests/output/normal.png.pdf differ -- cgit v1.2.3 From 87798e164bdb83d287a1e4511f2a22648dd02826 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:23:10 +0200 Subject: debian/control: fix my name --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 466cb98..1eddc6a 100644 --- a/debian/control +++ b/debian/control @@ -1,5 +1,5 @@ Source: img2pdf -Maintainer: Johannes Schauer +Maintainer: Johannes 'josch' Schauer Section: python Priority: optional Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 10), python3-pil, python3-pdfrw, help2man -- cgit v1.2.3 From c6ef5e1b6c85d6409d20610403b9dcb42a051af8 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:23:55 +0200 Subject: debian/changelog: new entry for 0.3.0-1 --- debian/changelog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/debian/changelog b/debian/changelog index 003c69d..1eebb3a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +img2pdf (0.3.0-1) UNRELEASED; urgency=medium + + * new upstream release + * + + -- Johannes 'josch' Schauer Fri, 20 Jul 2018 07:23:34 +0200 + img2pdf (0.2.3-1) unstable; urgency=medium * New upstream release -- cgit v1.2.3 From fdbcf0681754e7c3a815058c0e395a30707b7e1c Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:30:14 +0200 Subject: debian/copyright: use secure copyright format uri --- debian/copyright | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/copyright b/debian/copyright index 2458201..24b04a5 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,4 +1,4 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: img2pdf Source: https://gitlab.mister-muffin.de/josch/img2pdf -- cgit v1.2.3 From 338ce99ce59c9cdd0c0956a707347607bed2be8f Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:31:37 +0200 Subject: bump debhelper compat level to 11 --- debian/compat | 2 +- debian/control | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/compat b/debian/compat index f599e28..b4de394 100644 --- a/debian/compat +++ b/debian/compat @@ -1 +1 @@ -10 +11 diff --git a/debian/control b/debian/control index 1eddc6a..9bfbb78 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,7 @@ Source: img2pdf Maintainer: Johannes 'josch' Schauer Section: python Priority: optional -Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 10), python3-pil, python3-pdfrw, help2man +Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 11), python3-pil, python3-pdfrw, help2man Standards-Version: 3.9.8 Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ Vcs-Git: https://git.dgit.debian.org/img2pdf -- cgit v1.2.3 From 362f1c103ecf795002f324dc7a5019f65a510c99 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:25:58 +0200 Subject: debian/tests/default: test all test input --- debian/tests/default | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/debian/tests/default b/debian/tests/default index 6230e1f..9a0f42d 100644 --- a/debian/tests/default +++ b/debian/tests/default @@ -1,7 +1,12 @@ #!/bin/sh set -exu -for f in CMYK.jpg mono.png normal.jpg normal.png; do - img2pdf --nodate --producer="" src/tests/input/$f -o $ADTTMP/$f.pdf - diff -u --text src/tests/output/$f.pdf $ADTTMP/$f.pdf +for f in src/tests/input/*; do + bn=$(basename "$f") + pdfrw= + if [ "${f%.gif}" != "$f" ]; then + pdfrw=--without-pdfrw + fi + img2pdf $pdfrw --nodate --producer="" "$f" -o "$ADTTMP/$bn.pdf" + diff -u --text "src/tests/output/$bn.pdf" "$ADTTMP/$bn.pdf" done -- cgit v1.2.3 From 50f547a66904444ae808489816080876844e3a72 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:42:01 +0200 Subject: debian/watch: use secure uri --- debian/watch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/watch b/debian/watch index d69d385..8c28300 100644 --- a/debian/watch +++ b/debian/watch @@ -1,4 +1,4 @@ # please also check http://pypi.debian.net/img2pdf/watch version=3 opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ -http://pypi.debian.net/img2pdf/img2pdf-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) +https://pypi.debian.net/img2pdf/img2pdf-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) -- cgit v1.2.3 From 66430d2f9445c70cd9541f82910fde2084bb3a97 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 20 Jul 2018 07:42:51 +0200 Subject: Update changelog for 0.3.0-1 release --- debian/changelog | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index 1eebb3a..537f2b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,9 +1,12 @@ -img2pdf (0.3.0-1) UNRELEASED; urgency=medium +img2pdf (0.3.0-1) unstable; urgency=medium * new upstream release - * + * debian/copyright: use secure copyright format uri + * bump debhelper compat level to 11 + * debian/tests/default: test all test input + * debian/watch: use secure uri - -- Johannes 'josch' Schauer Fri, 20 Jul 2018 07:23:34 +0200 + -- Johannes 'josch' Schauer Fri, 20 Jul 2018 07:42:42 +0200 img2pdf (0.2.3-1) unstable; urgency=medium -- cgit v1.2.3 From aef245f415aae671df75502700826d2bb682e257 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 5 Aug 2018 21:07:37 +0200 Subject: debian/watch: check signature --- debian/upstream/signing-key.asc | 194 ++++++++++++++++++++++++++++++++++++++++ debian/watch | 2 +- 2 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 debian/upstream/signing-key.asc diff --git a/debian/upstream/signing-key.asc b/debian/upstream/signing-key.asc new file mode 100644 index 0000000..018940f --- /dev/null +++ b/debian/upstream/signing-key.asc @@ -0,0 +1,194 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBFHVWP0BEAC4vnKRkDgoQ4JrRHhDrKipbs4I0xwRSDHlhnD1bsa12PNaJytH +HUufulM5woChwGPFOH0Ex0eOzFWzQ1cHmijIIdm5h9tGSxQK+AF5lh2q9/ae1SXW +bh9u+6u8PWS1P9nxXMCN9c4ahwUb5YYCH2ThkmlhzvAeX0/hk85zecglsypUfQgO +9tp72S8CX/Lx0HX0at7xEioKgA39/ZWD4b7FktI3MX+UYMgOXsgsWqmY2gMGUp3E +3Aa6se6/63nhY3HLCCHUYS3pxP7Cnw5fI3/KJ9yBSGQ8LoNwijJtJD0XWTaUikKy ++MrifZDpfFIxvo/JJJLOXTi7nEnXZitKV5pz49/6CkhbSdAt423honj+Gn58viUw +pyMjpfCaZu4/RN8GLDMvlz2etst0HHnINIwQWPrXLubF3jqe8uhseKATO7FkgaOw +4o6xC+NZuycT7pXsb6m51y/TZfAq/eTP0TE8jMSVf1dpMoyLOcI4VciL26G7uujQ +qjdBVIgcPnv7XG9y+HqHX49pvTRo0Sum21LpbZRDCeRtYe8flbBvHzM+B+S93xSn +uppct4BFrKJ2RU7QCIpDOBvfP24cy9Nu+AphScXw5FtQOzKfz3+kosPz3uu0XCUt +xX7h8s00dT4hQQX/bMwjqa2WJNnoaqg5oIPMRNkZHPuPNcsdobSy3KJuWwARAQAB +tClKb2hhbm5lcyBTY2hhdWVyIDxqb3NjaEBtaXN0ZXItbXVmZmluLmRlPokCOgQT +AQgAJAIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAUCUdVbIQIZAQAKCRB9XYxg +z00+tHLgD/4xU2nGfP187CQm4d4cZG7maaLSWeyffY2UMgBYwppioIv99hPWFV8j +ePsBsAYGXH0TWaci+mLAiYveirGFnl4atwc9+YvWE5J1zot6nE6cFdv7YM6X4WaO +/5zjkI0uXXOckfDrT8HbTDhQvyrkLL3NB728lHkPZrTFxrmCJkWZb9zU2X4Mbp/M +Rhei1OWZu3FiOxc7or5dbfWL+t8XEmQ/CrO96f2rHOjcuk+o//4xcm1uDFbKS385 +OjtE6rcDowWvDGvd1IYIyFdXinCX0vZcTSrsgUknMHpOb//ucKiSC9tjNQlqXyAx +yok7KAyJrXRflY0ja2EEVywfWaw99H5o5tQq5utNnG/XitS98qEk4w8P6OC4FInr +uNgxyTcBFBAE/M2HnRCRW+1Tui1KB0gKAY09NhxX++6EbaetXqjiHBLnNBcOaSWZ +FMjzo/qn7YqRljAlLwHACK03J8yvbcIfFW4uTBpmE2+Y8vR+JaegW2+Xh94OVPeH +Q8y8GRkCwkZxBxNIZGjjm9MWuccGHxQtzU6balfYyAgoC1HvnSw8parqRW6kkl7h +TBJVcbJkJ37XRNVtRGPmdg98xrAk2MUZOFMOKBXFQW4e3a3KNRO8zQh7x/tmfG/R +ti3yIjpHLhGrkwaMOUdAz/Fh43ZNcR8mI2PIF7Aj4eA+/B/dyua9DYkCNwQTAQgA +IQUCUdVY/QIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRB9XYxgz00+tAWv +D/0Xv1rHwFJuzEQ02HUqGhHSqZVaDIrq/vxLMPKaGcUwrtGvgV79ijprUA6RlSiv +FSrcPglloPUdtR+csZ25oMYBSRDVd1B5+qqwyjYkgDwNA4/Cu+QlHevPCR8fGXnQ +kg97lQ6YKNdl1K1qm6rZzwUWobvVQK8hUF97orJzhgVU7/AA1B0lbVQyWJ7Odm+H +uBsijwCFpTDfVX3xCEdtf9AWMVD13D2vaqhmoobzyciE5cJHmVQC6CSfYs31gkIn +mVvNC1SN8/qvSxY/GvHCBV1suYhJn5Ls9LhaXR09m82sJsVRL5qEvMscBcpcDAdK +VQehl5i19mvDSNG9btwZ2E80mH57qXAW2K+h9n+5UhZk80hAsI25de+jy/Kp20L1 +nzMH/j5mEpHKMMWb1k5pZBho9Bskjbg/R9TQzfqUbAJUX/IvsruB+z/KD/CT4X2i +BFVjbnPk3E8T399rxKEfaqBqLAATjbP/VQMr3eZqs0roL6roPtVNf80wKjHk+KDA +yMBVD7caeyHrAAxAA4nIeFd9nfP3SN8MnnxEugceaAiLxyCcIfrteyftBddzOUGf +pej8LYYkrlbQuRv4+BXDya55vanBegc3bBWA4Ft0DE9QNnvRN9Tw+vvlRTrvEmqe +G/iKJYAu0jaVwq92JLSIA8gMoaP5B4LsC/4SfpIWipE7J4hGBBARAgAGBQJSFIGc +AAoJEIZFRLbFS9eYSrAAnRJc1hGFTJ0c+fPIsRHEyEYq1gu2AKCd3P6ML1JSrdjA +dnvCJrq19ygweYkCHAQQAQIABgUCUhSBugAKCRAtGqrPJEREQsn4EACLbUk7HVxk +MvXN9oluiIvery6dOMwCZnbjsuNLxQ0TtmPPIDFfvUp07TvNLP+/Uw4/KACHxv34 ++fyHJF/X6d12t2XzXtBC1ssbHvoK1oB4gnctJxYi6LgQcgxZPFRzpRC9AzvVuoyX +5sAZIq9QsGslZW5XRSS7A+ahfmrE7G87UD9DKNY5GGOWWaYbN5bqW/wDiuzxnzGB +uzY2Tb6Mq1jraZd2GwuaeM0i50hH65oueKVauR9CalOehWa9qV1/XHOvkC5ep7VQ +R1e+9jTMMz1vPazGM90nTmO+aXtgP1M+gSIIJ4K3QWf3f0GQ1jBwNTIk8gpkx+pH +siWdHsSvsexckjZ2TCgXciTeN5jYTcJNHbYKQoiHhmax5Rr/b/xDZdZzhqa/cWDV +1SPBC4zm4hNLp/xlN0sQLZBZoaco/gTfmSMNlnjAz3qbhxEZDckk4vhzngC3sQGX +n3L++9WTkDp8UO03BYHiKYnBkvZ7asQZkYOJW5pCZU2cFHlBeoFwPPDlgPr9LQla +KqZAGCW2hF8Xt3fjILo5NYcGjMimXwBlC+qsTrG5F0eiDzmHJCGp1ySfoB5V3P4E +YhwEptnYJDawI3qHaqPvBKR9cGjyLxIASJ6FSwpMtbhP9AwLhVlbMKBIatZKlTf3 +JpEEqlFmuaPTT2NGcyECiz87NgFnqxQYbIkCHAQQAQIABgUCUg/uewAKCRD7hjJR +qG+eR+oPD/4lsJby9M7xU4YZSuHA4sR8mjMm9ijaeBkf/2WVo0AEGxqA9qC+7bSn +Ly9lZVacJhrxLOducsDjodQwm0bKOX6F+sGSpoF9WmFH+6HUjrxvNLOV5P86BNJL +ucGQTL/v9ISBCRN3YZqMs0TKKbhTsSvOkEzReYvFfoShQH55qmLYVex4y4f3pBTv +fHx8if7g+aZtenXlwOEHUggp7glU3Y9pDwxAvFJcBSB398KRLub1NrWbmQwpvbYZ +ahlPJu8iSVAzCB/VsoEa207/pZin+pyhDSxgWsyB1ZOON+yDQJh9gQxHWS+PpOyc +MLfscU7rA633o40JILIILR35hNvnHRiSYn9FB/DcksmPDTrhtM5ZVFbKl3lwcgs5 +MdkqacdH90ByTMcW4RbjIte35T6ASd5qiRDpGGYoUeFBnVUrk/JtMCShK49V50+j +Ine52N/6/3b7L7sJZoK8GSM/pU+FSa6TrDQM6LNASfokkAfDy6m+OO4UT8eaq2jX ++x2L4IrnufwjG78Qhg2uLxs5/n3Y2nJJptCDPonbg/0t0NqDyJ2+tsFEQU2X3Xt8 +kXyNSrwPb2JPQ7CwxWLkO3jZYzo8u7LAPRqEEKy9ezNTV7+zRCUaDc3JTkccFsB1 +kCl58hebONButUR7uSn1c8PHkqv/1Kh29Bdgvabbr6rsr0BgZphH9IkBHAQQAQIA +BgUCUpcO6AAKCRD51nCAK/by1pIjB/4qaIR55JZ7ehhO0W+Jfu4XX8vVpt+w3TF2 +vbh5w/5BgmyPkOW8elpxDHPJYQM1S2w5TghILvcHG2dDogWFKfCOSBpdpbr4ZrsA +MTZXY3C6AiWR4JJfL/MG76yEedaON2fVjJLSL5zbwzcJ2gczHXkv+TIbqo0LnlJ1 +eTn1jn4omZD8awlUNke/Z8oWYgguqzOgtIgSW3L+Z3pFfjGr2waNO5pzikwf6k3k +R9zvDloYcv9m3g7BkSk9SeP9iNARuD40QEDIkSnA3aA3DrsL1npOY1h3ak5TC0Zw +GHfEw7SrGBy9Zo/mfJAdMPIQvFPqMV8jdZ93Sqh7gtD1RY6h6a1XiQEcBBABCgAG +BQJSlw68AAoJEHR3XKwTWKOZ7wAIALZQo2H1nfMzSS9iCfHtCy98EYG7Jmw1Wiej +2fNcgkGrVGL9onIXHGfIiIG57nnaWKdWX4pzXWkV2ojkVJrTeD/mAg8+iiTnV4Wl +Csx4YumG+SjNRE5J9Q+bj1h1WyEy3nslZX5DHmRAFzaV2s6GN1dhNdBSDDSrh6J4 +iyYCLfGM+Oqt1dozozBNri93++O9AqzmsTEDEZ+nKkuiQZekraj/Qz6RSZavqXam +V4lIZRSe0R3waB1R68P2gjMHOh5TUscvlVGWPArMD1yr4elTnIhIbMYwPjP2A8Ho +GxbVo8Dsjs/uJ8ZK1Ht5NrUZKj1cGEoPBES/CT03DQaSsml8XK20JUpvaGFubmVz +IFNjaGF1ZXIgPGouc2NoYXVlckBlbWFpbC5kZT6JAjcEEwEIACEFAlHVWuECGwMF +CwkIBwMFFQoJCAsFFgIDAQACHgECF4AACgkQfV2MYM9NPrQ1ZQ//QahNCnXvTKCy +AZBNqTrUufcmUUPqJi9rcCGy8efHMh9VmPok7YM3LSVNp22wbNmlv6TFU7klIKhJ +8DPgTvzjPpbRXcLlW2Xjt3ky45AymwK6p8ePaLOP07L4Cy86PlWzEWXLqNQtoMie +dvypUPKViK9VKCKTSryHRUwgvepmGTMYi332Hypwgjs+UjqGMvWrczmfAgGTqImU +pPqH6Eflz/Oalq+f5souo1FfL8yiXTww47Ba2YSmTVZAzYX6MeXqZPp8Xo/oGpxf +Bj0ygP9CGMW6Nzn23+3nCAcFbSO1PUs09kxMXsfvmZUxd3+Fo4czDV9e8w9NsRxX +NNhhkm3Tc4WQGDmZ+ITNKoDJGS+1UdWU+MfPah+fKZ87/O1R0+SX49a/vqTCzrHs +G2QG5jI/exFR014GdlUgc2u7NZ03dk3tb/zTZ5ByD4NPevy3SiEUsGTG0f5zf2se +SgqmSFRQhb5T4xFz3wrOQ48Pd6fHNO7cBuDtR8ZxenJrNC6FCLsvOvuKOXqve0bn +3C8Ei5rtpOWm+Hs/PfmSPO8g/u9Cp4sc2AN1ilBZovwpbxmjEt/zevrYdN05AsYo +mAPWScsirLDtloIiCN7xWbqdXWZAxVo43uWgYbRshSYpFrDyY09tN2bsb6ieWdkv +na4sst1XOpMXYk0rNLKk1CygHZpc0smJAhwEEAECAAYFAlIP7nsACgkQ+4YyUahv +nkeJKw/9G4Of0r/chxBKGwBeXZkwCjD7x/Jt0bBna0aQuBUDIUioUmFPN8fXeeqg +xY6ineq5nyPikXpPO4plWWA0KSrGMNFzAyIYfEsbcVjIpEbu9ayJxA3iHsjXB+vo +yyMylGdnsf1gomh7wzwu2CixRWfYPmBS2Z1sCEDTNBOCkKGGHUSLGEKrbDBYhX2f +0lJaFNJ7i4g2uLjbgmDsxikP0k6n6ImPBjHnSp8gRiUJJh7Vnbwf3aLS6lbAUSDz +LErCY3BwZVbR1LkmaIWqKMDaNlsm6CQhg1/FFjoEObwAkHmm6I7zyue54TdayEv5 +89T3sIxLZIN5txemuxrz3MgdiCU0mTrkGJdwxyvT3J2IUkln/lBhl3mU337cXmB6 +4OhiCq5UfwsdsalWvkNcR87aQHBq9mqpqUwdxTX6sCNo1T0WvFYsUdzX7E3DHFn9 +GZeMCW6iFw6tqfxeCG19RHsqwI4vVoS4gaG4yBI8EPi5QX4xsPMfM06Z/vGJZ+TI +xWJkG6fwg+9AS29LP0WACvyXAeVXmNEXT8jPfIoFBRBWKaGf7XJ/Bc1AVEuaxnlA +mcOZvpuMHcOP0Poo08wU4Bihm7+/aDhYcCWW4feMuDS2fRx4uj608mth6K3NsuHr +ifdoQkwVR9rKDcMSz1ehRMD+pgqhOuzhUPdfFUz+w2dSMnsCvo+IRgQQEQIABgUC +UhSBnAAKCRCGRUS2xUvXmBjoAKCllp2kNqwNfTPZ3HOoyBRraqq5mwCcDIM9R7Wk +NaRuTIxoPON+6ri2Lh+JAhwEEAECAAYFAlIUgboACgkQLRqqzyRERELDIhAAgl5W +soqpWds3aaZzYTiFtwkWRBfMrZsQI9dmKXmzzI+e/i5Z508W8Fv+IGM3zt/sQ/5j +EBDRmRc0+sBdUHb5FcvxVbQN7kzWa/5S8CNFM4e8RxeLOAJaTwwhrXt342G6msBl +iNoZlBt+8rOADQEm6GEjhjY41UCFPPp+JXf1Klvp9TlNkZou3ZXWOVSNKVlKvkM4 +w5rKbYSkOHbM2eTAHOquqP6+C0x3Y5n96bFcytvaLxpwI7IYqKT4WnX2ePvQRvIQ +nTpHQS97whMboSXbNBRfxRnpt7t8nj0KqEn/BeBgB8BYmYVObIohEGVzW5CbtBdd +fL89ONatxdr0aIZl5vPSnbOzDOb6pLzKfl71a1QUJESlWOTLOZvc3KqCMwUEngt7 +7gkO2rA+md45jXfMvk247QPahRdKC18XWU5nrMllrnhMAi3sFX5MMCMvOCmpX3Rf +CaLgm69ddFZYprpDytip4XsgalABnNuc/ukYaUwC5lwVpf6zKqe/WV1FDrRbqsIx +7tL3M6Iem68yUZSG4AGoWXoT556ClvtIJGvbfUjsIXRVsVc0N30Qf2UbyA5OLGEb +ReI6Ks7+4WpD0EDXMePCokhtSuMF2akAsmOJ5uvJXjJbxqTPpWgMVHbdBdZ9fGvh +wRUs4sOfLSqwrN96xwwt4BYxIu8O9ARBSL/sDYiJARwEEAECAAYFAlKXDugACgkQ ++dZwgCv28tZAiQf5AY/tT1QjRqNOhgZmuh1AdBq4v8+sYBhjc6wx6kFthhe1dmx0 +2DT/kCcF7TKihZ8IJtPurd8WCT6JlaGQERi6k1dk8ytjS3tHdtSaDXruPcEGdDZI ++TCSMPLcXTzhuU8+CvWRmcd2+54vaQk+bdgfkGTSfMvw7C/EGckiYdwfd8dT2GWl +GvNsyy9q4iWVOo1rseoNW0tLz53Y95WetT5TOgEzxZVT1c/9wphfd+WOmVT4AzSS +eaMB+JdYBhA/BQ3hz11PNEUraYEzSJsGruonvmx5LOze/s/ROIdk2dJycFUDt5Q+ +v7FS10rn2YixghWl8DhIgZZ62njV4VYUgbif9IkBHAQQAQoABgUCUpcOvAAKCRB0 +d1ysE1ijmWsnCACQ1m/hvBjyCOeAYGRrIOLWrqWxIHu8wbRMu7FIg2ITzKX9dPv8 +sADSNtgTnS+0D7q1LcbnkedglxMh64FGOHQ0RuQz+Y0ejSE2pLvx5MR5JXiuMrFC +nsNzcjcKR15IWoWdG9qMzDchTQQhdtUT6xR68bCs2sh+op6VAIAjrHiaUM7nDxoS +PVINj5bBx73Gg0XFwcUwo6LDPPju2nlgyabRcVhbtZuUgaGk7gnkk2oGeLEX+nWp +G7ychp/qkrPPyk00NDR2ZfVveTW3/9h9YW/AD8XoMCXnI2vjAfaPEUcpKRRn4twW +1dEqEwY8I/OEFQ1eZN0jFOsg11hkymZ7PhtgtCNKb2hhbm5lcyBTY2hhdWVyIDxq +b3NjaEBkZWJpYW4ub3JnPokCNwQTAQgAIQUCVNIfWgIbAwULCQgHAwUVCgkICwUW +AgMBAAIeAQIXgAAKCRB9XYxgz00+tEEnEACF4XXMwixZFIy2ZlpN7SCvt/mmNzFo +PZoJp5VCDk8CXH5wsXT8BhVsUNXMBdCr6N5+Cd12rDz3tkAvxKwJQhxMu6u87eri +rHWGD6SqFXMXKOLdLj2LeOUHijOYDVUxkoaGrMsM39RghfB++QuanhNXDQozLsWa +auVohhxF7EGzJ/y7evo66is8kdHHE3ZwgxYsS1eE5hKTy07MykoGCPkqLiDo5Fbn +7dNIFMGnCVkC5FvlwaaKJF2Bz9H+BMaUxgmvY4H+z/fyfUryq9xLYkFd83b0wAkN +4NGQ0qIjdd7hLd/tt1y48G6Hjk4VYz+863WOGyrNVHQQVvviiTgz9/eacp5nMyVW +cZ1eh4/PHpng5y9rTb2NoPy62eXro93/o7yWrCmuIO6ucVDj3nNyFayyfREJy/QK +aQryqP36u7pM7IBqsEpu70LWEOUgaQFpiUz+NWb575qeiSLydu6VYbhgcfkrKRBb +JbgqOojWTdci0tknNhQm/bEuZiUCj7l/9paHlo26isY4ZoqDwUgXJTKyd4/FIeZ4 +vfeQ/oY7XoRkNQ2yskyQWjSp8zImbfCsNOp7gHNgyPeUBPYVDU++m4u5B2c36KmV +Raxc6vXgEl8ExuvkZ3L0AM4vlG+Rj/nMRkMAjY4aA4LIHq63yP4Xsg+Noo5ZpHGO +wGpS+h3Ch5S3pbkCDQRR1Vj9ARAA2IZ4T9NMtm3vH/XjOa8z7q9GHhWlfh6i+7vh +EymfraFUTXu9O4JFOH7UiIVZvdRFP1yJCypZ0nEcTiiYlZMdJkk/9CcJiCoGMFmp +BndMuj4Hjp/lkOAXtKiNzUdw0gvaATdla5tB8U2OckXeeEuHov6aqX2Tgjw1/L1D +Q//72hv9l74BC4b5kpLgApv9A7oP6N4+idJLj0XpxsJlZDLSBAAAYcqYKBbMntfL +jrjr4oIw7Zrz7g3lNKHeDud6YxLnwzHuZfuD2ieD+/uvn957/TjFzF1yvNu8ip/7 +r6zL806TAJ/ndKm4SiuUo1+SiKjD30UGOqUYUj/H0nuVMp3HAGZSRA9VV4Um2b95 +2v6Qf+x8bdVz542CeIt2JAL8FR5MDmMco4ynqsmZ/Cw5Lc3zYSS0vbzO0J3FhY34 +RAy3BDf0TR/+NhipalF+Whu+OGRMs7V+XI0f2ttbc4R0X+0YGclWbf5urPZmMZ9Q +HAxfid4gP/aaHsEEgF7jACWNSrcBct4qUgfAFyg0c3xsgEjfvWvCAwrSyN4DGtDx +rA1hHPgbdZpTk4vFk4GpsPe3dLZ9uAjcteYG2f5zo6VVHIoORXKuHu6+2U3mRb3N +1MCuLWC8AN3aYM9JT4POVPI9eIxvkiXAb+eCVWjdt4LKB9QWLraA9H54Aw+yriv/ +KXL7LdcAEQEAAYkCHwQYAQgACQUCUdVY/QIbDAAKCRB9XYxgz00+tKtFEACw4W7Y +J/OYNtsyuC1fMFJLaGAP8wP4TrZ0NwTDqc7ke9LAERPVKZoQXsP0u32tBiwIRl7D +gDVvxudTPAN8z/cMxl3D+Pwcfqkm2MeN9As0yqXhp2BD+QIRJe0kAHTSWvoBoUBq +JAcDT2XhPGmu6Cgd+L6KuJlZqrflgIggNGedd5ownFFv0jR4JPt3gsqwMF3XGvEN +zVNR/4BCW5iijCZVqJy4IteQp6csw+ZT6Chd8M8+V2OlXIMqJaMMKrBedl2KcgtJ +aRGCjezQEryw/xwbgtjqGUb7nq7pGiUKnUA5d/qqw7blexiCxN1rM613WjZg/Ey1 +t4tkL7THBOl/HPW43ZM/QWQgzWA846nkydVl/VLRHf25FmfSvNvLcTGgFoeAUOKp +Q4Zg+6Iv8cUpxq0u3vbxNBXD/KVl2A5IGbb3e+ECUdL6BRyHoO4AYStAMdS3QhpZ +m5Ev9n1LzaG7fKiYPXgkvqwzup0UvKCihAoUjnEWlEOf7YihA3d2F1AojZa19ypW +DkwUow8PqggM2OqS4wgfFapGt8pAhYiCVUYqGqeVQYgFaSwQE44lVp34OFn4Vd1U +1CVH0gBsqr31C7fKUXoWlsnvZFGHg8Ibk3/pOsySGIeIBruprez+Rr0SloRZ9dHL +eSd1DTPMsgDW5tJg7mlOb8dGpXQ3FPUAVlKg57kCDQRR1VxFARAAvMCIkIxYhVv6 +vH1vxN4e9aRJ0Zf9uw1m4TAOspoRbwOAGLkGk8alI9rHMamF2is7SSLJYv5gTjmr +fHSfWBM9PgzelSEQEvqX4I8VoxhDdlzQ4oknjUpjaTk63eHG1+G1i+3CJWXqaL4y +93xarTPKBiYjbDckNKOZQeZ+aWgQrTMHKsVpR/f4g+0/RPkmmrn/7/5xWvkpg+5L +ehXc0B9ImSSEXco6i4/pGR3AvUWuFCKEQjiCM3tg7to2Hzj4REfkm9KXUpPC3pVp +Xg2NyO5rUZQi2F6FIVci2ntHpeNxO+c7C+u2wEpu7JF+FGLLzf0Zmeikpyi2DWCN +NzuxxoIiRdDAJZDGllzWgf2Yk8tN3Hbka0rbciRKug7pE9yc5WpYYN6fCxiU335c +Jl9cFrh8+OWE7Ec3mz4s0Dp3s+ykBWXPLXRKJxRseyMznYrpahvdWx+g3+0o/ZG+ +G/khINXQU2Uxesvm3t96GPTqLJosA2cS7+y9aRBERmJaIjN3jiehjVmgNgovw9hh +htnZ4fW3EIHKllaOx7izbBoHNSksaFnmT9qFOh1OqpwYran/maGKl5iP0EaTUadl +9gNeogQooMXxyeVTvAVfBqi8ie6FtPKwsqsqXINfSQKEJmNJzbj0Vj64zUaegSrK +zrVmxWEdiEqX7/BKnjav9G8ipgMHukcAEQEAAYkEPgQYAQgACQUCUdVcRQIbAgIp +CRB9XYxgz00+tMFdIAQZAQgABgUCUdVcRQAKCRDyy6XHj72D4alPD/9WgkDaUJDu +NdqO4zTN7fORZ3J7lMvnzvWGcWX9reLe/1ZaP7x8S9rbICNjRZVcf3+lZ2ipfuu0 +9eAEnoL3oyKPNYMNqj19lmQstlW7I3I1qQHTaxx65cgLnSaj86eY0NS9OH1YvXDr +PwQsx03D9V3fTfQbFDX3JJzdKu4MvHypl4POnukTr1xiZeYlr6uQf3a1yFBFCvEz +oQ2o307ZkAgTHZcAiDrUPkM6+ZPlJjetEmkYCl4a4+jWulCWDFZkv6NCTQFPkjHs +6Oe+/Zz3hdh8HuyBnbhKnRU/nnd8+My7hxRcpNHl+JEvVs1YEA9MZ3W3peRTN+hS +ei1qG+qLcMklemfhp0kzMnnidm2On/74Zv4YILGF8MVlC8tc8sl940ZyK4YRJU3j +GX2jUbqxv2iE1KcOEChHxNENyU43wjz3k6xBuUPAYB2oydrtLDMTqEH/0rv7PzYg +QomckLrN+PIPZa4xBR05E+fsy4fVm4dqQ+WUf7MLyWWGjMDzsLD3mXIsDy/ovNCp +e0xwukbUCqnedxVsWmWZfAYkxIFmfM65lyB6pVQnQn0EpVQoM6GN+db6xzlEw3nM +Bbsvte1MxTrdAUVyJJeIrjU8Pf3BpV+j3FX/+6uABltJcboo0BzLU2MyV9XRLwaH +WqSMgBreKDwlMgHndaEjYU9ldhezHrZ9ceXdD/9kjhuIgqyL0E/WoUTP2FXSxKdQ +i3yv4FGgFwGEb87MIGN6y2ebbPDUwUqRRwJOb1Kl+8DvtwQmtQ92VYfvb5eTE++H +PCcuRYF1ObVk5lsm9a6VQ5jjv4mxWXtg2FO+T14iywnip3QRYSZ0VUMBa3IQh7Ka +zPlt+JI9+rx2ZKT25NL+S6FT1seGHjTh2WukQ9yDMU5XDHqZbfcdwCxG1qxN2oof +fyJlsF5o+nWZhNcas/Ib8oqzSWwB1ODR+NSqajgrtBrVz5rczuCMwZ6Mo+YGdAw0 +viE9IrX56O45BFo2dt3e/1Vi4Vu0b8D0TfolvTQPEAYuFb+GZyy9h1ngPjxcsrKZ +g5rN5E+1gnhOwEab7cFnOBcecdBdGsbeIlkR4zUT5cdOc/ThINNoMIgCvTdgyYr+ +dY5DXgrtCKEN2bEhveDlCzhNeaHsHfewG18YWVrSm3SU1g/He/zkiVY4FCwM3pAu +kz8oAS55d1Kkr/8l753elvHZ/v28P+T/X9l4XJD8VIWP6clwDCZ9kc6vPGFNXiKh +nxG9rwUrcqQkaEnBbnw3QhdoTeTH5/aCnhdogHN0yX5elQwYVow36SeNx+IJ1P42 +2I7hkp8x/R0QD88klwGAv8FplZDOc/TNlRfGR38oNQE1neCN94+Bkyej++NjRix1 +kLUbeNIoVx98FilMTg== +=GO5J +-----END PGP PUBLIC KEY BLOCK----- diff --git a/debian/watch b/debian/watch index 8c28300..755e317 100644 --- a/debian/watch +++ b/debian/watch @@ -1,4 +1,4 @@ # please also check http://pypi.debian.net/img2pdf/watch version=3 -opts=uversionmangle=s/(rc|a|b|c)/~$1/ \ +opts=uversionmangle=s/(rc|a|b|c)/~$1/,pgpsigurlmangle=s/$/.asc/ \ https://pypi.debian.net/img2pdf/img2pdf-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) -- cgit v1.2.3 From 244f64ef12075bc9ad3a396e4cb1d510800140d3 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 5 Aug 2018 21:10:32 +0200 Subject: Import upstream version 0.3.1 --- CHANGES.rst | 50 +++++---- LICENSE | 165 ++++++++++++++++++++++++++++ MANIFEST.in | 1 + PKG-INFO | 227 ++++++++++++++++++++++++--------------- README.md | 223 +++++++++++++++++++++++--------------- setup.py | 2 +- src/img2pdf.egg-info/PKG-INFO | 227 ++++++++++++++++++++++++--------------- src/img2pdf.egg-info/SOURCES.txt | 1 + src/img2pdf.py | 141 +++++++++++++++--------- src/tests/__init__.py | 21 ++-- src/tests/input/mono.tif | Bin 262 -> 720 bytes src/tests/output/mono.tif.pdf | Bin 921 -> 915 bytes test_comp.sh | 6 +- 13 files changed, 722 insertions(+), 342 deletions(-) create mode 100644 LICENSE diff --git a/CHANGES.rst b/CHANGES.rst index d4476a8..4f5bee3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,42 +2,48 @@ CHANGES ======= -0.3.0 ------ +0.3.1 (2018-08-04) +------------------ + + - Directly copy data from CCITT Group 4 encoded TIFF images into the PDF + container without re-encoding + +0.3.0 (2018-06-18) +------------------ - Store non-jpeg images using PNG compression - Support arbitrarily large pages via PDF /UserUnit field - Disallow input with alpha channel as it cannot be preserved - Add option --pillow-limit-break to support very large input -0.2.4 ------ +0.2.4 (2017-05-23) +------------------ - Restore support for Python 2.7 - Add support for PyPy - Add support for testing using tox -0.2.3 ------ +0.2.3 (2017-01-20) +------------------ - version number bump for botched pypi upload... -0.2.2 ------ +0.2.2 (2017-01-20) +------------------ - automatic monochrome CCITT Group4 encoding via Pillow/libtiff -0.2.1 ------ +0.2.1 (2016-05-04) +------------------ - set img2pdf as /producer value - support multi-frame images like multipage TIFF and animated GIF - support for palette images like GIF - - support all colorspaces and imageformats knows by PIL + - support all colorspaces and imageformats known by PIL - read horizontal and vertical dpi from JPEG2000 files -0.2.0 ------ +0.2.0 (2015-05-10) +------------------ - now Python3 only - pep8 compliant code @@ -72,34 +78,34 @@ CHANGES - explicitly store date in UTC and allow parsing all date formats understood by dateutil and `date --date` -0.1.5 ------ +0.1.5 (2015-02-16) +------------------ - Enable support for CMYK images - Rework test suite - support file objects as input -0.1.4 ------ +0.1.4 (2015-01-21) +------------------ - add Python 3 support - make output reproducible by sorting and --nodate option -0.1.3 ------ +0.1.3 (2014-11-10) +------------------ - Avoid leaking file descriptors - Convert unrecognized colorspaces to RGB -0.1.1 ------ +0.1.1 (2014-09-07) +------------------ - allow running src/img2pdf.py standalone - license change from GPL to LGPL - Add pillow 2.4.0 support - add options to specify pdf dimensions in points -0.1.0 (unreleased) +0.1.0 (2014-03-14, unreleased) ------------------ - Initial PyPI release. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/MANIFEST.in b/MANIFEST.in index 4ee2b37..9249d3f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include README.md include test_comp.sh include CHANGES.rst +include LICENSE recursive-include src *.jpg recursive-include src *.pdf recursive-include src *.png diff --git a/PKG-INFO b/PKG-INFO index e3ecf4b..975388d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,80 +1,61 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.3.0 +Version: 0.3.1 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.0 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.1 Description-Content-Type: UNKNOWN Description: img2pdf ======= - Losslessly convert raster images to PDF. The file size will not unnecessarily - increase. It can for example be used to create a PDF document from a number of - scans that are only available in JPEG format. Existing solutions would either - re-encode the input JPEG files (leading to quality loss) or store them in the - zip/flate format which results into the PDF becoming unnecessarily large in - terms of its file size. - - Background - ---------- - - Quality loss can be avoided when converting PNG, JPEG and JPEG2000 images to - PDF by embedding them into the PDF without re-encoding them. This is what - img2pdf does. It thus treats the PDF format merely as a container format for - storing one or more JPEGs or PNGs without re-encoding the images themselves. - - If you know an existing tool which allows one to embed PNG, JPEG and JPEG2000 - images into a PDF container without recompression, please contact me so that I - can put this code into the garbage bin. - - Functionality - ------------- - - This program will take a list of raster images and produce a PDF file with the - images embedded in it. PNG, JPEG and JPEG2000 images will be included without - recompression and the resulting PDF will only be slightly larger than the input - images due to the overhead of the PDF container. Raster images in other - formats (like gif or tif) will be included using the lossless zip/flate - encoding using the PNG Paeth predictor. - - As a result, this tool is able to losslessly wrap raster images into a PDF - container with a quality to filesize ratio that is typically better (in case of - JPEG and JPEG2000 images) or equal (in case of other formats) than that of - existing tools. - - For example, imagemagick will re-encode the input JPEG image (thus changing - its content): - - $ convert img.jpg img.pdf - $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression - $ compare -metric AE img.jpg img.extr-000.ppm null: - 1.6301e+06 - - If one wants to losslessly convert from any format to PDF with - imagemagick, one has to use zip compression: - - $ convert input.jpg -compress Zip output.pdf - $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression - $ compare -metric AE img.jpg img.extr-000.ppm null: - 0 - - However, this approach will result in PDF files that are a few times larger - than the input JPEG or JPEG2000 file. - - Furthermore, when converting PNG images, popular tools like imagemagick use - flate encoding without a predictor. This means, that image file size ends up - being several orders of magnitude larger then necessary. - - img2pdf is able to losslessly embed PNG, JPEG and JPEG2000 files into a PDF - container without additional overhead (aside from the PDF structure itself), - save other graphics formats using lossless zip compression, and produce - multi-page PDF files when more than one input image is given. - - Also, since PNG, JPEG and JPEG2000 images are not reencoded, conversion with - img2pdf is several times faster than with other tools. + Lossless conversion of raster images to PDF. You should use img2pdf if your + priorities are (in this order): + + 1. **always lossless**: the image embedded in the PDF will always have the + exact same color information for every pixel as the input + 2. **small**: if possible, the difference in filesize between the input image + and the output PDF will only be the overhead of the PDF container itself + 3. **fast**: if possible, the input image is just pasted into the PDF document + as-is without any CPU hungry re-encoding of the pixel data + + Conventional conversion software (like ImageMagick) would either: + + 1. not be lossless because lossy re-encoding to JPEG + 2. not be small because using wasteful flate encoding of raw pixel data + 3. not be fast because input data gets re-encoded + + Another advantage of not having to re-encode the input (in most common + situations) is, that img2pdf is able to handle much larger input than other + software, because the raw pixel data never has to be loaded into memory. + + The following table shows how img2pdf handles different input depending on the + input file format and image color space. + + | Format | Colorspace | Result | + | -------------------- | ------------------------------ | ------------- | + | JPEG | any | direct | + | JPEG2000 | any | direct | + | PNG (non-interlaced) | any | direct | + | TIFF (CCITT Group 4) | monochrome | direct | + | any | any except CMYK and monochrome | PNG Paeth | + | any | monochrome | CCITT Group 4 | + | any | CMYK | flate | + + For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4 + encoded data, img2pdf directly embeds the image data into the PDF without + re-encoding it. It thus treats the PDF format merely as a container format for + the image data. In these cases, img2pdf only increases the filesize by the size + of the PDF container (typically around 500 to 700 bytes). Since data is only + copied and not re-encoded, img2pdf is also typically faster than other + solutions for these input formats. + + For all other input types, img2pdf first has to transform the pixel data to + make it compatible with PDF. In most cases, the PNG Paeth filter is applied to + the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for + CMYK input no filter is applied before finally applying flate compression. Usage ----- @@ -85,44 +66,45 @@ Description: img2pdf If no output file is specified with the `-o`/`--output` option, output will be done to stdout. A typical invocation is: - img2pdf img1.png img2.jpg -o out.pdf + $ img2pdf img1.png img2.jpg -o out.pdf The detailed documentation can be accessed by running: - img2pdf --help - + $ img2pdf --help Bugs ---- - If you find a JPEG or JPEG2000 file that, when embedded cannot be read - by the Adobe Acrobat Reader, please contact me. - - I have not yet figured out how to determine the colorspace of JPEG2000 files. - Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with - other colorspaces, you must explicitly specify it using the `--colorspace` - option. + - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, + when embedded into the PDF cannot be read by the Adobe Acrobat Reader, + please contact me. - It might be possible to store transparency using masks but it is not clear - what the utility of such a functionality would be. + - I have not yet figured out how to determine the colorspace of JPEG2000 + files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 + files with other colorspaces, you must explicitly specify it using the + `--colorspace` option. - Most vector graphic formats can be losslessly turned into PDF (minus some of - the features unsupported by PDF) but img2pdf will currently turn vector - graphics into their lossy raster representations. For converting raster - graphics to PDF, use another tool like inkscape and then join the resulting - pages with a tool like pdftk. + - Input images with alpha channels are not allowed. PDF doesn't support alpha + channels in images and thus, the alpha channel of the input would have to be + discarded. But img2pdf will always be lossless and thus, input images must + not carry transparency information. - A configuration file could be used for default options. + - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the + input if necessary. To prevent decompression bomb denial of service attacks, + Pillow limits the maximum number of pixels an input image is allowed to + have. If you are sure that you know what you are doing, then you can disable + this safeguard by passing the `--pillow-limit-break` option to img2pdf. This + allows one to process even very large input images. Installation ------------ - On a Debian- and Ubuntu-based systems, dependencies may be installed - with the following command: + On a Debian- and Ubuntu-based systems, img2pdf can be installed from the + official repositories: - apt-get install python3 python3-pil python3-setuptools + $ apt install img2pdf - You can then install the package using: + If you want to install it using pip, you can run: $ pip3 install img2pdf @@ -176,6 +158,75 @@ Description: img2pdf with open("name.pdf","wb") as f: f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) + Comparison to ImageMagick + ------------------------- + + Create a large test image: + + $ convert logo: -resize 8000x original.jpg + + Convert it into PDF using ImageMagick and img2pdf: + + $ time img2pdf original.jpg -o img2pdf.pdf + $ time convert original.jpg imagemagick.pdf + + Notice how ImageMagick took an order of magnitude longer to do the conversion + than img2pdf. It also used twice the memory. + + Now extract the image data from both PDF documents and compare it to the + original: + + $ pdfimages -all img2pdf.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 0 + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 118716 + + To get lossless output with ImageMagick we can use Zip compression but that + unnecessarily increases the size of the output: + + $ convert original.jpg -compress Zip imagemagick.pdf + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.png null: + 0 + $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf + 1535837 original.jpg + 1536683 img2pdf.pdf + 9397809 imagemagick.pdf + + Comparison to pdfLaTeX + ---------------------- + + pdfLaTeX performs a lossless conversion from included images to PDF by default. + If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same + way as img2pdf does it. But for other image formats it uses flate compression + of the plain pixel data and thus needlessly increases the output file size: + + $ convert logo: -resize 8000x original.png + $ cat << END > pdflatex.tex + \documentclass{article} + \usepackage{graphicx} + \begin{document} + \includegraphics{original.png} + \end{document} + END + $ pdflatex pdflatex.tex + $ stat --format="%s %n" original.png pdflatex.pdf + 4500182 original.png + 9318120 pdflatex.pdf + + Comparison to Tesseract OCR + --------------------------- + + Tesseract OCR comes closest to the functionality img2pdf provides. It is able + to convert JPEG and PNG input to PDF without needlessly increasing the filesize + and is at the same time lossless. So if your input is JPEG and PNG images, then + you should safely be able to use Tesseract instead of img2pdf. For other input, + Tesseract might not do a lossless conversion. For example it converts CMYK + input to RGB and removes the alpha channel from images with transparency. For + multipage TIFF or animated GIF, it will only convert the first frame. + Keywords: jpeg pdf converter Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable diff --git a/README.md b/README.md index 249abb8..ef25643 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,51 @@ img2pdf ======= -Losslessly convert raster images to PDF. The file size will not unnecessarily -increase. It can for example be used to create a PDF document from a number of -scans that are only available in JPEG format. Existing solutions would either -re-encode the input JPEG files (leading to quality loss) or store them in the -zip/flate format which results into the PDF becoming unnecessarily large in -terms of its file size. - -Background ----------- - -Quality loss can be avoided when converting PNG, JPEG and JPEG2000 images to -PDF by embedding them into the PDF without re-encoding them. This is what -img2pdf does. It thus treats the PDF format merely as a container format for -storing one or more JPEGs or PNGs without re-encoding the images themselves. - -If you know an existing tool which allows one to embed PNG, JPEG and JPEG2000 -images into a PDF container without recompression, please contact me so that I -can put this code into the garbage bin. - -Functionality -------------- - -This program will take a list of raster images and produce a PDF file with the -images embedded in it. PNG, JPEG and JPEG2000 images will be included without -recompression and the resulting PDF will only be slightly larger than the input -images due to the overhead of the PDF container. Raster images in other -formats (like gif or tif) will be included using the lossless zip/flate -encoding using the PNG Paeth predictor. - -As a result, this tool is able to losslessly wrap raster images into a PDF -container with a quality to filesize ratio that is typically better (in case of -JPEG and JPEG2000 images) or equal (in case of other formats) than that of -existing tools. - -For example, imagemagick will re-encode the input JPEG image (thus changing -its content): - - $ convert img.jpg img.pdf - $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression - $ compare -metric AE img.jpg img.extr-000.ppm null: - 1.6301e+06 - -If one wants to losslessly convert from any format to PDF with -imagemagick, one has to use zip compression: - - $ convert input.jpg -compress Zip output.pdf - $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression - $ compare -metric AE img.jpg img.extr-000.ppm null: - 0 - -However, this approach will result in PDF files that are a few times larger -than the input JPEG or JPEG2000 file. - -Furthermore, when converting PNG images, popular tools like imagemagick use -flate encoding without a predictor. This means, that image file size ends up -being several orders of magnitude larger then necessary. - -img2pdf is able to losslessly embed PNG, JPEG and JPEG2000 files into a PDF -container without additional overhead (aside from the PDF structure itself), -save other graphics formats using lossless zip compression, and produce -multi-page PDF files when more than one input image is given. - -Also, since PNG, JPEG and JPEG2000 images are not reencoded, conversion with -img2pdf is several times faster than with other tools. +Lossless conversion of raster images to PDF. You should use img2pdf if your +priorities are (in this order): + + 1. **always lossless**: the image embedded in the PDF will always have the + exact same color information for every pixel as the input + 2. **small**: if possible, the difference in filesize between the input image + and the output PDF will only be the overhead of the PDF container itself + 3. **fast**: if possible, the input image is just pasted into the PDF document + as-is without any CPU hungry re-encoding of the pixel data + +Conventional conversion software (like ImageMagick) would either: + + 1. not be lossless because lossy re-encoding to JPEG + 2. not be small because using wasteful flate encoding of raw pixel data + 3. not be fast because input data gets re-encoded + +Another advantage of not having to re-encode the input (in most common +situations) is, that img2pdf is able to handle much larger input than other +software, because the raw pixel data never has to be loaded into memory. + +The following table shows how img2pdf handles different input depending on the +input file format and image color space. + +| Format | Colorspace | Result | +| -------------------- | ------------------------------ | ------------- | +| JPEG | any | direct | +| JPEG2000 | any | direct | +| PNG (non-interlaced) | any | direct | +| TIFF (CCITT Group 4) | monochrome | direct | +| any | any except CMYK and monochrome | PNG Paeth | +| any | monochrome | CCITT Group 4 | +| any | CMYK | flate | + +For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4 +encoded data, img2pdf directly embeds the image data into the PDF without +re-encoding it. It thus treats the PDF format merely as a container format for +the image data. In these cases, img2pdf only increases the filesize by the size +of the PDF container (typically around 500 to 700 bytes). Since data is only +copied and not re-encoded, img2pdf is also typically faster than other +solutions for these input formats. + +For all other input types, img2pdf first has to transform the pixel data to +make it compatible with PDF. In most cases, the PNG Paeth filter is applied to +the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for +CMYK input no filter is applied before finally applying flate compression. Usage ----- @@ -75,44 +56,45 @@ descriptor. If no output file is specified with the `-o`/`--output` option, output will be done to stdout. A typical invocation is: - img2pdf img1.png img2.jpg -o out.pdf + $ img2pdf img1.png img2.jpg -o out.pdf The detailed documentation can be accessed by running: - img2pdf --help - + $ img2pdf --help Bugs ---- -If you find a JPEG or JPEG2000 file that, when embedded cannot be read -by the Adobe Acrobat Reader, please contact me. - -I have not yet figured out how to determine the colorspace of JPEG2000 files. -Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with -other colorspaces, you must explicitly specify it using the `--colorspace` -option. + - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, + when embedded into the PDF cannot be read by the Adobe Acrobat Reader, + please contact me. -It might be possible to store transparency using masks but it is not clear -what the utility of such a functionality would be. + - I have not yet figured out how to determine the colorspace of JPEG2000 + files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 + files with other colorspaces, you must explicitly specify it using the + `--colorspace` option. -Most vector graphic formats can be losslessly turned into PDF (minus some of -the features unsupported by PDF) but img2pdf will currently turn vector -graphics into their lossy raster representations. For converting raster -graphics to PDF, use another tool like inkscape and then join the resulting -pages with a tool like pdftk. + - Input images with alpha channels are not allowed. PDF doesn't support alpha + channels in images and thus, the alpha channel of the input would have to be + discarded. But img2pdf will always be lossless and thus, input images must + not carry transparency information. -A configuration file could be used for default options. + - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the + input if necessary. To prevent decompression bomb denial of service attacks, + Pillow limits the maximum number of pixels an input image is allowed to + have. If you are sure that you know what you are doing, then you can disable + this safeguard by passing the `--pillow-limit-break` option to img2pdf. This + allows one to process even very large input images. Installation ------------ -On a Debian- and Ubuntu-based systems, dependencies may be installed -with the following command: +On a Debian- and Ubuntu-based systems, img2pdf can be installed from the +official repositories: - apt-get install python3 python3-pil python3-setuptools + $ apt install img2pdf -You can then install the package using: +If you want to install it using pip, you can run: $ pip3 install img2pdf @@ -165,3 +147,72 @@ The package can also be used as a library: layout_fun = img2pdf.get_layout_fun(a4inpt) with open("name.pdf","wb") as f: f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) + +Comparison to ImageMagick +------------------------- + +Create a large test image: + + $ convert logo: -resize 8000x original.jpg + +Convert it into PDF using ImageMagick and img2pdf: + + $ time img2pdf original.jpg -o img2pdf.pdf + $ time convert original.jpg imagemagick.pdf + +Notice how ImageMagick took an order of magnitude longer to do the conversion +than img2pdf. It also used twice the memory. + +Now extract the image data from both PDF documents and compare it to the +original: + + $ pdfimages -all img2pdf.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 0 + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 118716 + +To get lossless output with ImageMagick we can use Zip compression but that +unnecessarily increases the size of the output: + + $ convert original.jpg -compress Zip imagemagick.pdf + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.png null: + 0 + $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf + 1535837 original.jpg + 1536683 img2pdf.pdf + 9397809 imagemagick.pdf + +Comparison to pdfLaTeX +---------------------- + +pdfLaTeX performs a lossless conversion from included images to PDF by default. +If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same +way as img2pdf does it. But for other image formats it uses flate compression +of the plain pixel data and thus needlessly increases the output file size: + + $ convert logo: -resize 8000x original.png + $ cat << END > pdflatex.tex + \documentclass{article} + \usepackage{graphicx} + \begin{document} + \includegraphics{original.png} + \end{document} + END + $ pdflatex pdflatex.tex + $ stat --format="%s %n" original.png pdflatex.pdf + 4500182 original.png + 9318120 pdflatex.pdf + +Comparison to Tesseract OCR +--------------------------- + +Tesseract OCR comes closest to the functionality img2pdf provides. It is able +to convert JPEG and PNG input to PDF without needlessly increasing the filesize +and is at the same time lossless. So if your input is JPEG and PNG images, then +you should safely be able to use Tesseract instead of img2pdf. For other input, +Tesseract might not do a lossless conversion. For example it converts CMYK +input to RGB and removes the alpha channel from images with transparency. For +multipage TIFF or animated GIF, it will only convert the first frame. diff --git a/setup.py b/setup.py index 56e9c4c..cc56301 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup PY3 = sys.version_info[0] >= 3 -VERSION = "0.3.0" +VERSION = "0.3.1" INSTALL_REQUIRES = ( 'Pillow', diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO index e3ecf4b..975388d 100644 --- a/src/img2pdf.egg-info/PKG-INFO +++ b/src/img2pdf.egg-info/PKG-INFO @@ -1,80 +1,61 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.3.0 +Version: 0.3.1 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.0 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.1 Description-Content-Type: UNKNOWN Description: img2pdf ======= - Losslessly convert raster images to PDF. The file size will not unnecessarily - increase. It can for example be used to create a PDF document from a number of - scans that are only available in JPEG format. Existing solutions would either - re-encode the input JPEG files (leading to quality loss) or store them in the - zip/flate format which results into the PDF becoming unnecessarily large in - terms of its file size. - - Background - ---------- - - Quality loss can be avoided when converting PNG, JPEG and JPEG2000 images to - PDF by embedding them into the PDF without re-encoding them. This is what - img2pdf does. It thus treats the PDF format merely as a container format for - storing one or more JPEGs or PNGs without re-encoding the images themselves. - - If you know an existing tool which allows one to embed PNG, JPEG and JPEG2000 - images into a PDF container without recompression, please contact me so that I - can put this code into the garbage bin. - - Functionality - ------------- - - This program will take a list of raster images and produce a PDF file with the - images embedded in it. PNG, JPEG and JPEG2000 images will be included without - recompression and the resulting PDF will only be slightly larger than the input - images due to the overhead of the PDF container. Raster images in other - formats (like gif or tif) will be included using the lossless zip/flate - encoding using the PNG Paeth predictor. - - As a result, this tool is able to losslessly wrap raster images into a PDF - container with a quality to filesize ratio that is typically better (in case of - JPEG and JPEG2000 images) or equal (in case of other formats) than that of - existing tools. - - For example, imagemagick will re-encode the input JPEG image (thus changing - its content): - - $ convert img.jpg img.pdf - $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression - $ compare -metric AE img.jpg img.extr-000.ppm null: - 1.6301e+06 - - If one wants to losslessly convert from any format to PDF with - imagemagick, one has to use zip compression: - - $ convert input.jpg -compress Zip output.pdf - $ pdfimages img.pdf img.extr # not using -j to be extra sure there is no recompression - $ compare -metric AE img.jpg img.extr-000.ppm null: - 0 - - However, this approach will result in PDF files that are a few times larger - than the input JPEG or JPEG2000 file. - - Furthermore, when converting PNG images, popular tools like imagemagick use - flate encoding without a predictor. This means, that image file size ends up - being several orders of magnitude larger then necessary. - - img2pdf is able to losslessly embed PNG, JPEG and JPEG2000 files into a PDF - container without additional overhead (aside from the PDF structure itself), - save other graphics formats using lossless zip compression, and produce - multi-page PDF files when more than one input image is given. - - Also, since PNG, JPEG and JPEG2000 images are not reencoded, conversion with - img2pdf is several times faster than with other tools. + Lossless conversion of raster images to PDF. You should use img2pdf if your + priorities are (in this order): + + 1. **always lossless**: the image embedded in the PDF will always have the + exact same color information for every pixel as the input + 2. **small**: if possible, the difference in filesize between the input image + and the output PDF will only be the overhead of the PDF container itself + 3. **fast**: if possible, the input image is just pasted into the PDF document + as-is without any CPU hungry re-encoding of the pixel data + + Conventional conversion software (like ImageMagick) would either: + + 1. not be lossless because lossy re-encoding to JPEG + 2. not be small because using wasteful flate encoding of raw pixel data + 3. not be fast because input data gets re-encoded + + Another advantage of not having to re-encode the input (in most common + situations) is, that img2pdf is able to handle much larger input than other + software, because the raw pixel data never has to be loaded into memory. + + The following table shows how img2pdf handles different input depending on the + input file format and image color space. + + | Format | Colorspace | Result | + | -------------------- | ------------------------------ | ------------- | + | JPEG | any | direct | + | JPEG2000 | any | direct | + | PNG (non-interlaced) | any | direct | + | TIFF (CCITT Group 4) | monochrome | direct | + | any | any except CMYK and monochrome | PNG Paeth | + | any | monochrome | CCITT Group 4 | + | any | CMYK | flate | + + For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4 + encoded data, img2pdf directly embeds the image data into the PDF without + re-encoding it. It thus treats the PDF format merely as a container format for + the image data. In these cases, img2pdf only increases the filesize by the size + of the PDF container (typically around 500 to 700 bytes). Since data is only + copied and not re-encoded, img2pdf is also typically faster than other + solutions for these input formats. + + For all other input types, img2pdf first has to transform the pixel data to + make it compatible with PDF. In most cases, the PNG Paeth filter is applied to + the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for + CMYK input no filter is applied before finally applying flate compression. Usage ----- @@ -85,44 +66,45 @@ Description: img2pdf If no output file is specified with the `-o`/`--output` option, output will be done to stdout. A typical invocation is: - img2pdf img1.png img2.jpg -o out.pdf + $ img2pdf img1.png img2.jpg -o out.pdf The detailed documentation can be accessed by running: - img2pdf --help - + $ img2pdf --help Bugs ---- - If you find a JPEG or JPEG2000 file that, when embedded cannot be read - by the Adobe Acrobat Reader, please contact me. - - I have not yet figured out how to determine the colorspace of JPEG2000 files. - Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 files with - other colorspaces, you must explicitly specify it using the `--colorspace` - option. + - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, + when embedded into the PDF cannot be read by the Adobe Acrobat Reader, + please contact me. - It might be possible to store transparency using masks but it is not clear - what the utility of such a functionality would be. + - I have not yet figured out how to determine the colorspace of JPEG2000 + files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 + files with other colorspaces, you must explicitly specify it using the + `--colorspace` option. - Most vector graphic formats can be losslessly turned into PDF (minus some of - the features unsupported by PDF) but img2pdf will currently turn vector - graphics into their lossy raster representations. For converting raster - graphics to PDF, use another tool like inkscape and then join the resulting - pages with a tool like pdftk. + - Input images with alpha channels are not allowed. PDF doesn't support alpha + channels in images and thus, the alpha channel of the input would have to be + discarded. But img2pdf will always be lossless and thus, input images must + not carry transparency information. - A configuration file could be used for default options. + - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the + input if necessary. To prevent decompression bomb denial of service attacks, + Pillow limits the maximum number of pixels an input image is allowed to + have. If you are sure that you know what you are doing, then you can disable + this safeguard by passing the `--pillow-limit-break` option to img2pdf. This + allows one to process even very large input images. Installation ------------ - On a Debian- and Ubuntu-based systems, dependencies may be installed - with the following command: + On a Debian- and Ubuntu-based systems, img2pdf can be installed from the + official repositories: - apt-get install python3 python3-pil python3-setuptools + $ apt install img2pdf - You can then install the package using: + If you want to install it using pip, you can run: $ pip3 install img2pdf @@ -176,6 +158,75 @@ Description: img2pdf with open("name.pdf","wb") as f: f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) + Comparison to ImageMagick + ------------------------- + + Create a large test image: + + $ convert logo: -resize 8000x original.jpg + + Convert it into PDF using ImageMagick and img2pdf: + + $ time img2pdf original.jpg -o img2pdf.pdf + $ time convert original.jpg imagemagick.pdf + + Notice how ImageMagick took an order of magnitude longer to do the conversion + than img2pdf. It also used twice the memory. + + Now extract the image data from both PDF documents and compare it to the + original: + + $ pdfimages -all img2pdf.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 0 + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 118716 + + To get lossless output with ImageMagick we can use Zip compression but that + unnecessarily increases the size of the output: + + $ convert original.jpg -compress Zip imagemagick.pdf + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.png null: + 0 + $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf + 1535837 original.jpg + 1536683 img2pdf.pdf + 9397809 imagemagick.pdf + + Comparison to pdfLaTeX + ---------------------- + + pdfLaTeX performs a lossless conversion from included images to PDF by default. + If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same + way as img2pdf does it. But for other image formats it uses flate compression + of the plain pixel data and thus needlessly increases the output file size: + + $ convert logo: -resize 8000x original.png + $ cat << END > pdflatex.tex + \documentclass{article} + \usepackage{graphicx} + \begin{document} + \includegraphics{original.png} + \end{document} + END + $ pdflatex pdflatex.tex + $ stat --format="%s %n" original.png pdflatex.pdf + 4500182 original.png + 9318120 pdflatex.pdf + + Comparison to Tesseract OCR + --------------------------- + + Tesseract OCR comes closest to the functionality img2pdf provides. It is able + to convert JPEG and PNG input to PDF without needlessly increasing the filesize + and is at the same time lossless. So if your input is JPEG and PNG images, then + you should safely be able to use Tesseract instead of img2pdf. For other input, + Tesseract might not do a lossless conversion. For example it converts CMYK + input to RGB and removes the alpha channel from images with transparency. For + multipage TIFF or animated GIF, it will only convert the first frame. + Keywords: jpeg pdf converter Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt index ae6e816..3271401 100644 --- a/src/img2pdf.egg-info/SOURCES.txt +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -1,4 +1,5 @@ CHANGES.rst +LICENSE MANIFEST.in README.md setup.cfg diff --git a/src/img2pdf.py b/src/img2pdf.py index 48ef964..7c1978e 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -22,7 +22,7 @@ import sys import os import zlib import argparse -from PIL import Image +from PIL import Image, TiffImagePlugin from datetime import datetime from jp2 import parsejp2 from enum import Enum @@ -32,7 +32,7 @@ import struct PY3 = sys.version_info[0] >= 3 -__version__ = "0.3.0" +__version__ = "0.3.1" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", @@ -62,7 +62,7 @@ PageOrientation = Enum('PageOrientation', 'portrait landscape') Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other') -ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 PNG other') +ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 PNG TIFF other') PageMode = Enum('PageMode', 'none outlines thumbs') @@ -277,7 +277,8 @@ if PY3: @classmethod def encode(cls, string, hextype=False): if hextype: - return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >' + return b'< ' + b' '.join( + ("%06x" % c).encode('ascii') for c in string) + b' >' else: try: string = string.encode('ascii') @@ -292,7 +293,8 @@ else: @classmethod def encode(cls, string, hextype=False): if hextype: - return b'< ' + b' '.join(("%06x"%c).encode('ascii') for c in string) + b' >' + return b'< ' + b' '.join( + ("%06x" % c).encode('ascii') for c in string) + b' >' else: # This mimics exactely to what pdfrw does. string = string.replace(b'\\', b'\\\\') @@ -374,7 +376,7 @@ class pdfdoc(object): def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, - pageheight, userunit=None, palette=None): + pageheight, userunit=None, palette=None, inverted=False): if self.with_pdfrw: from pdfrw import PdfDict, PdfName, PdfObject, PdfString from pdfrw.py23_diffs import convert_load @@ -393,8 +395,11 @@ class pdfdoc(object): colorspace = PdfName.DeviceCMYK elif color == Colorspace.P: if self.with_pdfrw: - raise Exception("pdfrw does not support hex strings for palette image input, re-run with --without-pdfrw") - colorspace = [ PdfName.Indexed, PdfName.DeviceRGB, len(palette)-1, PdfString.encode(palette, hextype=True)] + raise Exception("pdfrw does not support hex strings for " + "palette image input, re-run with " + "--without-pdfrw") + colorspace = [PdfName.Indexed, PdfName.DeviceRGB, len(palette)-1, + PdfString.encode(palette, hextype=True)] else: raise UnsupportedColorspaceError("unsupported color space: %s" % color.name) @@ -440,15 +445,20 @@ class pdfdoc(object): if imgformat is ImageFormat.CCITTGroup4: decodeparms = PdfDict() + # The default for the K parameter is 0 which indicates Group 3 1-D + # encoding. We set it to -1 because we want Group 4 encoding. decodeparms[PdfName.K] = -1 - decodeparms[PdfName.BlackIs1] = PdfObject('true') + if inverted: + decodeparms[PdfName.BlackIs1] = PdfObject('false') + else: + decodeparms[PdfName.BlackIs1] = PdfObject('true') decodeparms[PdfName.Columns] = imgwidthpx decodeparms[PdfName.Rows] = imgheightpx image[PdfName.DecodeParms] = [decodeparms] elif imgformat is ImageFormat.PNG: decodeparms = PdfDict() decodeparms[PdfName.Predictor] = 15 - if color in [ Colorspace.P, Colorspace['1'], Colorspace.L ]: + if color in [Colorspace.P, Colorspace['1'], Colorspace.L]: decodeparms[PdfName.Colors] = 1 else: decodeparms[PdfName.Colors] = 3 @@ -642,13 +652,14 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): ics = imgdata.mode if ics in ["LA", "PA", "RGBA"]: - logging.warning("Image contains transparency which cannot be retained in PDF.") + logging.warning("Image contains transparency which cannot be retained " + "in PDF.") logging.warning("img2pdf will not perform a lossy operation.") logging.warning("You can remove the alpha channel using imagemagick:") - logging.warning(" $ convert input.png -background white -alpha remove -alpha off output.png") + logging.warning(" $ convert input.png -background white -alpha " + "remove -alpha off output.png") raise Exception("Refusing to work on images with alpha channel") - # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0 # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG # header. In that case it can happen that the horizontal and vertical DPI @@ -685,11 +696,33 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): return (color, ndpi, imgwidthpx, imgheightpx) +def ccitt_payload_location_from_pil(img): + # If Pillow is passed an invalid compression argument it will ignore it; + # make sure the image actually got compressed. + if img.info['compression'] != 'group4': + raise ValueError("Image not compressed with CCITT Group 4 but with: %s" + % img.info['compression']) + + # Read the TIFF tags to find the offset(s) of the compressed data strips. + strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS] + strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] + rows_per_strip = img.tag_v2[TiffImagePlugin.ROWSPERSTRIP] + + # PIL always seems to create a single strip even for very large TIFFs when + # it saves images, so assume we only have to read a single strip. + # A test ~10 GPixel image was still encoded as a single strip. Just to be + # safe check throw an error if there is more than one offset. + if len(strip_offsets) != 1 or len(strip_bytes) != 1: + raise NotImplementedError("Transcoding multiple strips not supported") + + (offset, ), (length, ) = strip_offsets, strip_bytes + + return offset, length + + def transcode_monochrome(imgdata): """Convert the open PIL.Image imgdata to compressed CCITT Group4 data""" - from PIL import TiffImagePlugin - logging.debug("Converting monochrome to CCITT Group4") # Convert the image to Group 4 in memory. If libtiff is not installed and @@ -707,27 +740,11 @@ def transcode_monochrome(imgdata): newimgio.seek(0) newimg = Image.open(newimgio) - # If Pillow is passed an invalid compression argument it will ignore it; - # make sure the image actually got compressed. - if newimg.info['compression'] != 'group4': - raise ValueError("Image not compressed as expected") - - # Read the TIFF tags to find the offset(s) of the compressed data strips. - strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS] - strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] - rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP] - - # PIL always seems to create a single strip even for very large TIFFs when - # it saves images, so assume we only have to read a single strip. - # A test ~10 GPixel image was still encoded as a single strip. Just to be - # safe check throw an error if there is more than one offset. - if len(strip_offsets) > 1: - raise NotImplementedError("Transcoding multiple strips not supported") + offset, length = ccitt_payload_location_from_pil(newimg) - newimgio.seek(strip_offsets[0]) - ccittdata = newimgio.read(strip_bytes[0]) + newimgio.seek(offset) + return newimgio.read(length) - return ccittdata def parse_png(rawdata): pngidat = b"" @@ -737,18 +754,20 @@ def parse_png(rawdata): # once we can require Python >= 3.2 we can use int.from_bytes() instead n, = struct.unpack('>I', rawdata[i-8:i-4]) if i + n > len(rawdata): - raise Exception("invalid png: %d %d %d"%(i, n, len(rawdata))) + raise Exception("invalid png: %d %d %d" % (i, n, len(rawdata))) if rawdata[i-4:i] == b"IDAT": pngidat += rawdata[i:i+n] elif rawdata[i-4:i] == b"PLTE": for j in range(i, i+n, 3): - # with int.from_bytes() we would not have to prepend extra zeroes + # with int.from_bytes() we would not have to prepend extra + # zeroes color, = struct.unpack('>I', b'\x00'+rawdata[j:j+3]) palette.append(color) i += n i += 12 return pngidat, palette + def read_images(rawdata, colorspace, first_frame_only=False): im = BytesIO(rawdata) im.seek(0) @@ -786,7 +805,8 @@ def read_images(rawdata, colorspace, first_frame_only=False): if color == Colorspace['RGBA']: raise JpegColorspaceError("jpeg can't have an alpha channel") im.close() - return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [])] + return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [], + False)] # We can directly embed the IDAT chunk of PNG images if the PNG is not # interlaced @@ -799,7 +819,30 @@ def read_images(rawdata, colorspace, first_frame_only=False): color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace, rawdata) pngidat, palette = parse_png(rawdata) - return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, palette)] + im.close() + return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, + palette, False)] + + # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it + # only contains a single strip + if imgformat == ImageFormat.TIFF \ + and imgdata.info['compression'] == "group4" \ + and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1: + photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION] + inverted = False + if photo == 0: + inverted = True + elif photo != 1: + raise ValueError("unsupported photometric interpretation for " + "group4 tiff: %d" % photo) + color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata) + offset, length = ccitt_payload_location_from_pil(imgdata) + im.seek(offset) + rawdata = im.read(length) + im.close() + return [(color, ndpi, ImageFormat.CCITTGroup4, rawdata, imgwidthpx, + imgheightpx, [], inverted)] # Everything else has to be encoded @@ -826,7 +869,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): ccittdata = transcode_monochrome(imgdata) imgformat = ImageFormat.CCITTGroup4 result.append((color, ndpi, imgformat, ccittdata, - imgwidthpx, imgheightpx, [])) + imgwidthpx, imgheightpx, [], False)) img_page_count += 1 continue except Exception as e: @@ -839,13 +882,14 @@ def read_images(rawdata, colorspace, first_frame_only=False): logging.debug("Colorspace is OK: %s", color) newimg = imgdata else: - raise ValueError("unknown or unsupported colorspace: %s" % color.name) + raise ValueError("unknown or unsupported colorspace: %s" + % color.name) # the PNG format does not support CMYK, so we fall back to normal # compression if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]: imggz = zlib.compress(newimg.tobytes()) result.append((color, ndpi, imgformat, imggz, imgwidthpx, - imgheightpx, [])) + imgheightpx, [], False)) else: # cheapo version to retrieve a PNG encoding of the payload is to # just save it with PIL. In the future this could be replaced by @@ -855,7 +899,7 @@ def read_images(rawdata, colorspace, first_frame_only=False): pngidat, palette = parse_png(pngbuffer.getvalue()) imgformat = ImageFormat.PNG result.append((color, ndpi, imgformat, pngidat, imgwidthpx, - imgheightpx, palette)) + imgheightpx, palette, False)) img_page_count += 1 # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method @@ -1164,14 +1208,14 @@ def convert(*images, **kwargs): try: with open(img, "rb") as f: rawdata = f.read() - except: + except Exception: # whatever the exception is (string could contain NUL # characters or the path could just not exist) it's not a file # name so we now try treating it as raw image content rawdata = img - for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, palette \ - in read_images( + for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, \ + palette, inverted in read_images( rawdata, kwargs['colorspace'], kwargs['first_frame_only']): pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi) @@ -1195,7 +1239,8 @@ def convert(*images, **kwargs): imgypdf = (pageheight - imgheightpdf)/2.0 pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, - imgypdf, pagewidth, pageheight, userunit, palette) + imgypdf, pagewidth, pageheight, userunit, + palette, inverted) if kwargs['outputstream']: pdf.tostream(kwargs['outputstream']) @@ -1542,7 +1587,7 @@ Fit options: Argument parsing: Argument long options can be abbreviated to a prefix if the abbreviation is - anambiguous. That is, the prefix must match a unique option. + unambiguous. That is, the prefix must match a unique option. Beware of your shell interpreting argument values as special characters (like the semicolon in the CMYK;I colorspace option). If in doubt, put the argument @@ -1667,7 +1712,7 @@ RGB.''') "to prevent decompression bomb denial of service attacks. If " "your input image contains more pixels than that, use this " "option to disable this safety measure during this run of img2pdf" - %Image.MAX_IMAGE_PIXELS) + % Image.MAX_IMAGE_PIXELS) sizeargs = parser.add_argument_group( title='Image and page size and layout arguments', diff --git a/src/tests/__init__.py b/src/tests/__init__.py index b1c1797..c9b85e3 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -592,10 +592,17 @@ def test_suite(): if imgprops.DecodeParms: if orig_img.format == 'PNG': pngidat, palette = img2pdf.parse_png(orig_imgdata) + elif orig_img.format == 'TIFF' \ + and orig_img.info['compression'] == "group4": + offset, length = \ + img2pdf.ccitt_payload_location_from_pil( + orig_img) + pngidat = orig_imgdata[offset:offset+length] else: pngbuffer = BytesIO() orig_img.save(pngbuffer, format="png") - pngidat, palette = img2pdf.parse_png(pngbuffer.getvalue()) + pngidat, palette = img2pdf.parse_png( + pngbuffer.getvalue()) self.assertEqual(zlib.decompress(pngidat), imgdata) else: colorspace = imgprops.ColorSpace @@ -607,17 +614,19 @@ def test_suite(): colorspace = 'CMYK' else: raise Exception("invalid colorspace") - im = Image.frombytes(colorspace, (int(imgprops.Width), - int(imgprops.Height)), + im = Image.frombytes(colorspace, + (int(imgprops.Width), + int(imgprops.Height)), imgdata) if orig_img.mode == '1': self.assertEqual(im.tobytes(), orig_img.convert("L").tobytes()) - elif orig_img.mode not in ("RGB", "L", "CMYK", "CMYK;I"): + elif orig_img.mode not in ("RGB", "L", "CMYK", + "CMYK;I"): self.assertEqual(im.tobytes(), orig_img.convert("RGB").tobytes()) - # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not - # have the close() method + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does + # not have the close() method try: im.close() except AttributeError: diff --git a/src/tests/input/mono.tif b/src/tests/input/mono.tif index 53e85bc..3718d52 100644 Binary files a/src/tests/input/mono.tif and b/src/tests/input/mono.tif differ diff --git a/src/tests/output/mono.tif.pdf b/src/tests/output/mono.tif.pdf index d23e65e..eda3ec7 100644 Binary files a/src/tests/output/mono.tif.pdf and b/src/tests/output/mono.tif.pdf differ diff --git a/test_comp.sh b/test_comp.sh index ae832e2..44edefd 100755 --- a/test_comp.sh +++ b/test_comp.sh @@ -16,17 +16,17 @@ for a in `convert -list compress`; do echo "encode:\t$a" convert "$1" -compress $a "`basename $1 .jpg`.pdf" pdfimages "`basename $1 .jpg`.pdf" "`basename $1 .jpg`" - /bin/echo -ne "diff:\t" + printf "diff:\t" diff=`compare -metric AE "$1" "\`basename $1 .jpg\`-000.ppm" null: 2>&1` if [ "$diff" != "0" ]; then echo "lossy" else echo "lossless" fi - /bin/echo -ne "size:\t" + printf "size:\t" pdfsize=`stat -c "%s" "\`basename $1 .jpg\`.pdf"` echo "scale=1;$pdfsize/$imsize" | bc - /bin/echo -ne "pdf:\t" + printf "pdf:\t" grep --max-count=1 --text /Filter "`basename $1 .jpg`.pdf" echo done -- cgit v1.2.3 From fe31a108d82bed7802006ee64eb7c08bff0595bb Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 5 Aug 2018 21:11:28 +0200 Subject: debian/changelog: add new changelog entry --- debian/changelog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/debian/changelog b/debian/changelog index 537f2b9..60d51ed 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +img2pdf (0.3.1-1) UNRELEASED; urgency=medium + + * new upstream release + * debian/watch: check signature + + -- Johannes 'josch' Schauer Sun, 05 Aug 2018 21:10:55 +0200 + img2pdf (0.3.0-1) unstable; urgency=medium * new upstream release -- cgit v1.2.3 From 6c13db1a7912acb2d599457265f02597e4c92b98 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 5 Aug 2018 21:12:24 +0200 Subject: debian/control: add breaks on ocrmypdf (<< 6.2.3-1) Only later ocrmypdf versions are able to cope with the restriction of img2pdf not to be able to convert images with alpha channel. --- debian/control | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/control b/debian/control index 9bfbb78..27c4c6d 100644 --- a/debian/control +++ b/debian/control @@ -29,6 +29,7 @@ Package: python3-img2pdf Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Suggests: python3-pdfrw +Breaks: ocrmypdf (<< 6.2.3-1) Description: Lossless conversion of raster images to PDF (library) This module will take a list of raster images and produce a PDF file with the images embedded in it. JPEG and JPEG2000 images will be included without -- cgit v1.2.3 From ea5274e8fcdc535cb8ee7408734712fc023375d5 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 5 Aug 2018 21:38:44 +0200 Subject: Update changelog for 0.3.1-1 release --- debian/changelog | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 60d51ed..2189d32 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,9 +1,10 @@ -img2pdf (0.3.1-1) UNRELEASED; urgency=medium +img2pdf (0.3.1-1) unstable; urgency=medium * new upstream release * debian/watch: check signature + * debian/control: add breaks on ocrmypdf (<< 6.2.3-1) - -- Johannes 'josch' Schauer Sun, 05 Aug 2018 21:10:55 +0200 + -- Johannes 'josch' Schauer Sun, 05 Aug 2018 21:38:39 +0200 img2pdf (0.3.0-1) unstable; urgency=medium -- cgit v1.2.3 From 322bcdadf439963ee372828ea094604fabfdc9c8 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 23 Nov 2018 18:18:23 +0100 Subject: import upstream version 0.3.2 --- CHANGES.rst | 11 ++ PKG-INFO | 8 +- setup.py | 2 +- src/img2pdf.egg-info/PKG-INFO | 8 +- src/img2pdf.py | 228 ++++++++++++++++++++++++++++++------------ 5 files changed, 184 insertions(+), 73 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4f5bee3..cbe43ce 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,17 @@ CHANGES ======= +0.3.2 (2018-11-20) +------------------ + + - support big endian TIFF with lsb-to-msb FillOrder + - support multipage CCITT Group 4 TIFF + - also reject palette images with transparency + - support PNG images with 1, 2, 4 or 16 bits per sample + - support multipage TIFF with differently encoded images + - support CCITT Group4 TIFF without rows-per-strip + - add extensive test suite + 0.3.1 (2018-08-04) ------------------ diff --git a/PKG-INFO b/PKG-INFO index 975388d..7925752 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,13 +1,12 @@ -Metadata-Version: 1.1 +Metadata-Version: 2.1 Name: img2pdf -Version: 0.3.1 +Version: 0.3.2 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.1 -Description-Content-Type: UNKNOWN +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.2 Description: img2pdf ======= @@ -243,3 +242,4 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English Classifier: Operating System :: OS Independent +Provides-Extra: test diff --git a/setup.py b/setup.py index cc56301..8160035 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup PY3 = sys.version_info[0] >= 3 -VERSION = "0.3.1" +VERSION = "0.3.2" INSTALL_REQUIRES = ( 'Pillow', diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO index 975388d..7925752 100644 --- a/src/img2pdf.egg-info/PKG-INFO +++ b/src/img2pdf.egg-info/PKG-INFO @@ -1,13 +1,12 @@ -Metadata-Version: 1.1 +Metadata-Version: 2.1 Name: img2pdf -Version: 0.3.1 +Version: 0.3.2 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.1 -Description-Content-Type: UNKNOWN +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.2 Description: img2pdf ======= @@ -243,3 +242,4 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English Classifier: Operating System :: OS Independent +Provides-Extra: test diff --git a/src/img2pdf.py b/src/img2pdf.py index 7c1978e..e9402b3 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -23,6 +23,7 @@ import os import zlib import argparse from PIL import Image, TiffImagePlugin +#TiffImagePlugin.DEBUG = True from datetime import datetime from jp2 import parsejp2 from enum import Enum @@ -32,7 +33,7 @@ import struct PY3 = sys.version_info[0] >= 3 -__version__ = "0.3.1" +__version__ = "0.3.2" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", @@ -77,6 +78,39 @@ Unit = Enum('Unit', 'pt cm mm inch') ImgUnit = Enum('ImgUnit', 'pt cm mm inch perc dpi') +TIFFBitRevTable = [ + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, + 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, + 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, + 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, + 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, + 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, + 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, + 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, + 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, + 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, + 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, + 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, + 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, + 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, + 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, + 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, + 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, + 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, + 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, + 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, + 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, + 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, + 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, + 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, + 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, + 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, + 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, + 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, + 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, + 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, + 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, + 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF] class NegativeDimensionError(Exception): pass @@ -284,6 +318,10 @@ if PY3: string = string.encode('ascii') except UnicodeEncodeError: string = b"\xfe\xff"+string.encode("utf-16-be") + # We should probably encode more here because at least + # ghostscript interpretes a carriage return byte (0x0D) as a + # new line byte (0x0A) + # PDF supports: \n, \r, \t, \b and \f string = string.replace(b'\\', b'\\\\') string = string.replace(b'(', b'\\(') string = string.replace(b')', b'\\)') @@ -376,7 +414,8 @@ class pdfdoc(object): def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, - pageheight, userunit=None, palette=None, inverted=False): + pageheight, userunit=None, palette=None, inverted=False, + depth=0): if self.with_pdfrw: from pdfrw import PdfDict, PdfName, PdfObject, PdfString from pdfrw.py23_diffs import convert_load @@ -423,21 +462,7 @@ class pdfdoc(object): image[PdfName.Width] = imgwidthpx image[PdfName.Height] = imgheightpx image[PdfName.ColorSpace] = colorspace - # hardcoded as PIL doesn't provide bits for non-jpeg formats - if imgformat is ImageFormat.CCITTGroup4: - image[PdfName.BitsPerComponent] = 1 - else: - if color == Colorspace['1']: - image[PdfName.BitsPerComponent] = 1 - elif color == Colorspace.P: - if len(palette) <= 2**1: - image[PdfName.BitsPerComponent] = 1 - elif len(palette) <= 2**4: - image[PdfName.BitsPerComponent] = 4 - else: - image[PdfName.BitsPerComponent] = 8 - else: - image[PdfName.BitsPerComponent] = 8 + image[PdfName.BitsPerComponent] = depth if color == Colorspace['CMYK;I']: # Inverts all four channels @@ -463,17 +488,7 @@ class pdfdoc(object): else: decodeparms[PdfName.Colors] = 3 decodeparms[PdfName.Columns] = imgwidthpx - if color == Colorspace['1']: - decodeparms[PdfName.BitsPerComponent] = 1 - elif color == Colorspace.P: - if len(palette) <= 2**1: - decodeparms[PdfName.BitsPerComponent] = 1 - elif len(palette) <= 2**4: - decodeparms[PdfName.BitsPerComponent] = 4 - else: - decodeparms[PdfName.BitsPerComponent] = 8 - else: - decodeparms[PdfName.BitsPerComponent] = 8 + decodeparms[PdfName.BitsPerComponent] = depth image[PdfName.DecodeParms] = decodeparms text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" % @@ -651,7 +666,7 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) ics = imgdata.mode - if ics in ["LA", "PA", "RGBA"]: + if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info: logging.warning("Image contains transparency which cannot be retained " "in PDF.") logging.warning("img2pdf will not perform a lossy operation.") @@ -667,6 +682,12 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): if ndpi == (0, 0): ndpi = (default_dpi, default_dpi) + # PIL defaults to a dpi of 1 if a TIFF image does not specify the dpi. + # In that case, we want to use a different default. + if ndpi == (1, 1) and imgformat == ImageFormat.TIFF: + ndpi = (imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi), + imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi)) + logging.debug("input dpi = %d x %d", *ndpi) if colorspace: @@ -678,7 +699,16 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): if c.name == ics: color = c if color is None: - color = Colorspace.other + # PIL does not provide the information about the original + # colorspace for 16bit grayscale PNG images. Thus, we retrieve + # that info manually by looking at byte 10 in the IHDR chunk. We + # know where to find that in the file because the IHDR chunk must + # be the first chunk + if rawdata is not None and imgformat == ImageFormat.PNG \ + and rawdata[25] == 0: + color = Colorspace.L + else: + raise ValueError("unknown colorspace") if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG: # Adobe inverts CMYK JPEGs for some reason, and others # have followed suit as well. Some software assumes the @@ -706,7 +736,7 @@ def ccitt_payload_location_from_pil(img): # Read the TIFF tags to find the offset(s) of the compressed data strips. strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS] strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] - rows_per_strip = img.tag_v2[TiffImagePlugin.ROWSPERSTRIP] + rows_per_strip = img.tag_v2.get(TiffImagePlugin.ROWSPERSTRIP, 2**32 - 1) # PIL always seems to create a single strip even for very large TIFFs when # it saves images, so assume we only have to read a single strip. @@ -717,6 +747,9 @@ def ccitt_payload_location_from_pil(img): (offset, ), (length, ) = strip_offsets, strip_bytes + logging.debug("TIFF strip_offsets: %d" % offset) + logging.debug("TIFF strip_bytes: %d" % length) + return offset, length @@ -758,6 +791,15 @@ def parse_png(rawdata): if rawdata[i-4:i] == b"IDAT": pngidat += rawdata[i:i+n] elif rawdata[i-4:i] == b"PLTE": + # This could be as simple as saying "palette = rawdata[i:i+n]" but + # pdfrw does only escape parenthesis and backslashes in the raw + # byte stream. But raw carriage return bytes are interpreted as + # line feed bytes by ghostscript. So instead we use the hex string + # format. pdfrw cannot write it but at least ghostscript is happy + # with it. We would also write out the palette in binary format + # (and escape more bytes) but since we cannot use pdfrw anyways, + # we choose the more human readable variant. + # See https://github.com/pmaupin/pdfrw/issues/147 for j in range(i, i+n, 3): # with int.from_bytes() we would not have to prepend extra # zeroes @@ -805,8 +847,9 @@ def read_images(rawdata, colorspace, first_frame_only=False): if color == Colorspace['RGBA']: raise JpegColorspaceError("jpeg can't have an alpha channel") im.close() + logging.debug("read_images() embeds a JPEG") return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [], - False)] + False, 8)] # We can directly embed the IDAT chunk of PNG images if the PNG is not # interlaced @@ -820,31 +863,28 @@ def read_images(rawdata, colorspace, first_frame_only=False): imgdata, imgformat, default_dpi, colorspace, rawdata) pngidat, palette = parse_png(rawdata) im.close() + # PIL does not provide the information about the original bits per + # sample. Thus, we retrieve that info manually by looking at byte 9 in + # the IHDR chunk. We know where to find that in the file because the + # IHDR chunk must be the first chunk + depth = rawdata[24] + if depth not in [1, 2, 4, 8, 16]: + raise ValueError("invalid bit depth: %d" % depth) + logging.debug("read_images() embeds a PNG") return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, - palette, False)] - - # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it - # only contains a single strip - if imgformat == ImageFormat.TIFF \ - and imgdata.info['compression'] == "group4" \ - and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1: - photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION] - inverted = False - if photo == 0: - inverted = True - elif photo != 1: - raise ValueError("unsupported photometric interpretation for " - "group4 tiff: %d" % photo) - color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata) - offset, length = ccitt_payload_location_from_pil(imgdata) - im.seek(offset) - rawdata = im.read(length) - im.close() - return [(color, ndpi, ImageFormat.CCITTGroup4, rawdata, imgwidthpx, - imgheightpx, [], inverted)] + palette, False, depth)] - # Everything else has to be encoded + # If our input is not JPEG or PNG, then we might have a format that + # supports multiple frames (like TIFF or GIF), so we need a loop to + # iterate through all frames of the image. + # + # Each frame gets compressed using PNG compression *except* if: + # + # * The image is monochrome => encode using CCITT group 4 + # + # * The image is CMYK => zip plain RGB data + # + # * We are handling a CCITT encoded TIFF frame => embed data result = [] img_page_count = 0 @@ -858,6 +898,56 @@ def read_images(rawdata, colorspace, first_frame_only=False): if first_frame_only and img_page_count > 0: break + # PIL is unable to preserve the data of 16-bit RGB TIFF files and will + # convert it to 8-bit without the possibility to retrieve the original + # data + # https://github.com/python-pillow/Pillow/issues/1888 + # + # Some tiff images do not have BITSPERSAMPLE set. Use this to create + # such a tiff: tiffset -u 258 test.tif + if imgformat == ImageFormat.TIFF \ + and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8: + raise ValueError("PIL is unable to preserve more than 8 bits per sample") + + # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it + # only contains a single strip + if imgformat == ImageFormat.TIFF \ + and imgdata.info['compression'] == "group4" \ + and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1: + photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION] + inverted = False + if photo == 0: + inverted = True + elif photo != 1: + raise ValueError("unsupported photometric interpretation for " + "group4 tiff: %d" % photo) + color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata) + offset, length = ccitt_payload_location_from_pil(imgdata) + im.seek(offset) + rawdata = im.read(length) + fillorder = imgdata.tag_v2.get(TiffImagePlugin.FILLORDER) + if fillorder is None: + # no FillOrder: nothing to do + pass + elif fillorder == 1: + # msb-to-lsb: nothing to do + pass + elif fillorder == 2: + logging.debug("fillorder is lsb-to-msb => reverse bits") + # lsb-to-msb: reverse bits of each byte + rawdata = bytearray(rawdata) + for i in range(len(rawdata)): + rawdata[i] = TIFFBitRevTable[rawdata[i]] + rawdata = bytes(rawdata) + else: + raise ValueError("unsupported FillOrder: %d" % fillorder) + logging.debug("read_images() embeds Group4 from TIFF") + result.append((color, ndpi, ImageFormat.CCITTGroup4, rawdata, + imgwidthpx, imgheightpx, [], inverted, 1)) + img_page_count += 1 + continue + logging.debug("Converting frame: %d" % img_page_count) color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( @@ -867,9 +957,10 @@ def read_images(rawdata, colorspace, first_frame_only=False): if color == Colorspace['1']: try: ccittdata = transcode_monochrome(imgdata) - imgformat = ImageFormat.CCITTGroup4 - result.append((color, ndpi, imgformat, ccittdata, - imgwidthpx, imgheightpx, [], False)) + logging.debug( + "read_images() encoded a B/W image as CCITT group 4") + result.append((color, ndpi, ImageFormat.CCITTGroup4, ccittdata, + imgwidthpx, imgheightpx, [], False, 1)) img_page_count += 1 continue except Exception as e: @@ -888,8 +979,9 @@ def read_images(rawdata, colorspace, first_frame_only=False): # compression if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]: imggz = zlib.compress(newimg.tobytes()) + logging.debug("read_images() encoded CMYK with flate compression") result.append((color, ndpi, imgformat, imggz, imgwidthpx, - imgheightpx, [], False)) + imgheightpx, [], False, 8)) else: # cheapo version to retrieve a PNG encoding of the payload is to # just save it with PIL. In the future this could be replaced by @@ -897,9 +989,17 @@ def read_images(rawdata, colorspace, first_frame_only=False): pngbuffer = BytesIO() newimg.save(pngbuffer, format="png") pngidat, palette = parse_png(pngbuffer.getvalue()) - imgformat = ImageFormat.PNG - result.append((color, ndpi, imgformat, pngidat, imgwidthpx, - imgheightpx, palette, False)) + # PIL does not provide the information about the original bits per + # sample. Thus, we retrieve that info manually by looking at byte 9 in + # the IHDR chunk. We know where to find that in the file because the + # IHDR chunk must be the first chunk + pngbuffer.seek(24) + depth = ord(pngbuffer.read(1)) + if depth not in [1, 2, 4, 8, 16]: + raise ValueError("invalid bit depth: %d" % depth) + logging.debug("read_images() encoded an image as PNG") + result.append((color, ndpi, ImageFormat.PNG, pngidat, imgwidthpx, + imgheightpx, palette, False, depth)) img_page_count += 1 # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method @@ -1215,7 +1315,7 @@ def convert(*images, **kwargs): rawdata = img for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, \ - palette, inverted in read_images( + palette, inverted, depth in read_images( rawdata, kwargs['colorspace'], kwargs['first_frame_only']): pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi) @@ -1240,7 +1340,7 @@ def convert(*images, **kwargs): pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, imgdata, imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, pageheight, userunit, - palette, inverted) + palette, inverted, depth) if kwargs['outputstream']: pdf.tostream(kwargs['outputstream']) -- cgit v1.2.3 From 994e8b1c4aeb9cafbc71a142071773a9e08ec070 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Fri, 23 Nov 2018 18:23:24 +0100 Subject: upload version 0.3.2-1 to unstable --- debian/changelog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/debian/changelog b/debian/changelog index 2189d32..de18c17 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +img2pdf (0.3.2-1) unstable; urgency=medium + + * new upstream release + - fixed PNG handling (closes: #913153) + + -- Johannes 'josch' Schauer Fri, 23 Nov 2018 18:23:26 +0100 + img2pdf (0.3.1-1) unstable; urgency=medium * new upstream release -- cgit v1.2.3 From 4c9789be351000f6261b7e022d9d8dc12d8f2ed3 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 10:56:49 +0100 Subject: Import upstream version 0.3.3 --- CHANGES.rst | 8 + MANIFEST.in | 1 + PKG-INFO | 4 +- setup.py | 2 +- src/img2pdf.egg-info/PKG-INFO | 4 +- src/img2pdf.egg-info/SOURCES.txt | 1 + src/img2pdf.py | 1648 ++++++++++++++++++++++++++------------ src/tests/__init__.py | 77 +- test.sh | 1468 +++++++++++++++++++++++++++++++++ 9 files changed, 2708 insertions(+), 505 deletions(-) create mode 100755 test.sh diff --git a/CHANGES.rst b/CHANGES.rst index cbe43ce..a9ab56b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,14 @@ CHANGES ======= +0.3.3 (2019-01-07) +------------------ + + - restore basic support for Python 2 + - also ship test.sh + - add legal and tabloid paper formats + - respect exif rotation tag + 0.3.2 (2018-11-20) ------------------ diff --git a/MANIFEST.in b/MANIFEST.in index 9249d3f..d86af25 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include README.md include test_comp.sh +include test.sh include CHANGES.rst include LICENSE recursive-include src *.jpg diff --git a/PKG-INFO b/PKG-INFO index 7925752..7553591 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 2.1 Name: img2pdf -Version: 0.3.2 +Version: 0.3.3 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.2 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.3 Description: img2pdf ======= diff --git a/setup.py b/setup.py index 8160035..b0438fe 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup PY3 = sys.version_info[0] >= 3 -VERSION = "0.3.2" +VERSION = "0.3.3" INSTALL_REQUIRES = ( 'Pillow', diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO index 7925752..7553591 100644 --- a/src/img2pdf.egg-info/PKG-INFO +++ b/src/img2pdf.egg-info/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 2.1 Name: img2pdf -Version: 0.3.2 +Version: 0.3.3 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.2 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.3 Description: img2pdf ======= diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt index 3271401..6fa068a 100644 --- a/src/img2pdf.egg-info/SOURCES.txt +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -4,6 +4,7 @@ MANIFEST.in README.md setup.cfg setup.py +test.sh test_comp.sh src/img2pdf.py src/jp2.py diff --git a/src/img2pdf.py b/src/img2pdf.py index e9402b3..27e5b8c 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -23,7 +23,9 @@ import os import zlib import argparse from PIL import Image, TiffImagePlugin -#TiffImagePlugin.DEBUG = True + +# TiffImagePlugin.DEBUG = True +from PIL.ExifTags import TAGS from datetime import datetime from jp2 import parsejp2 from enum import Enum @@ -33,84 +35,313 @@ import struct PY3 = sys.version_info[0] >= 3 -__version__ = "0.3.2" +__version__ = "0.3.3" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", - "a0": "841mmx1189mm", - "a1": "594mmx841mm", - "a2": "420mmx594mm", - "a3": "297mmx420mm", - "a4": "210mmx297mm", - "a5": "148mmx210mm", - "a6": "105mmx148mm", + "a0": "841mmx1189mm", + "a1": "594mmx841mm", + "a2": "420mmx594mm", + "a3": "297mmx420mm", + "a4": "210mmx297mm", + "a5": "148mmx210mm", + "a6": "105mmx148mm", + "legal": "8.5inx14in", + "tabloid": "11inx17in", } papernames = { "letter": "Letter", - "a0": "A0", - "a1": "A1", - "a2": "A2", - "a3": "A3", - "a4": "A4", - "a5": "A5", - "a6": "A6", + "a0": "A0", + "a1": "A1", + "a2": "A2", + "a3": "A3", + "a4": "A4", + "a5": "A5", + "a6": "A6", + "legal": "Legal", + "tabloid": "Tabloid", } -FitMode = Enum('FitMode', 'into fill exact shrink enlarge') +FitMode = Enum("FitMode", "into fill exact shrink enlarge") -PageOrientation = Enum('PageOrientation', 'portrait landscape') +PageOrientation = Enum("PageOrientation", "portrait landscape") -Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other') +Colorspace = Enum("Colorspace", "RGB L 1 CMYK CMYK;I RGBA P other") -ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 PNG TIFF other') +ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF other") -PageMode = Enum('PageMode', 'none outlines thumbs') +PageMode = Enum("PageMode", "none outlines thumbs") -PageLayout = Enum('PageLayout', - 'single onecolumn twocolumnright twocolumnleft') +PageLayout = Enum("PageLayout", "single onecolumn twocolumnright twocolumnleft") -Magnification = Enum('Magnification', 'fit fith fitbh') +Magnification = Enum("Magnification", "fit fith fitbh") -ImgSize = Enum('ImgSize', 'abs perc dpi') +ImgSize = Enum("ImgSize", "abs perc dpi") -Unit = Enum('Unit', 'pt cm mm inch') +Unit = Enum("Unit", "pt cm mm inch") -ImgUnit = Enum('ImgUnit', 'pt cm mm inch perc dpi') +ImgUnit = Enum("ImgUnit", "pt cm mm inch perc dpi") TIFFBitRevTable = [ - 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, - 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, - 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, - 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, - 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, - 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, - 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, - 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, - 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, - 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, - 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, - 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, - 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, - 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, - 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, - 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, - 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, - 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, - 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, - 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, - 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, - 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, - 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, - 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, - 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, - 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, - 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, - 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, - 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, - 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, - 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, - 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF] + 0x00, + 0x80, + 0x40, + 0xC0, + 0x20, + 0xA0, + 0x60, + 0xE0, + 0x10, + 0x90, + 0x50, + 0xD0, + 0x30, + 0xB0, + 0x70, + 0xF0, + 0x08, + 0x88, + 0x48, + 0xC8, + 0x28, + 0xA8, + 0x68, + 0xE8, + 0x18, + 0x98, + 0x58, + 0xD8, + 0x38, + 0xB8, + 0x78, + 0xF8, + 0x04, + 0x84, + 0x44, + 0xC4, + 0x24, + 0xA4, + 0x64, + 0xE4, + 0x14, + 0x94, + 0x54, + 0xD4, + 0x34, + 0xB4, + 0x74, + 0xF4, + 0x0C, + 0x8C, + 0x4C, + 0xCC, + 0x2C, + 0xAC, + 0x6C, + 0xEC, + 0x1C, + 0x9C, + 0x5C, + 0xDC, + 0x3C, + 0xBC, + 0x7C, + 0xFC, + 0x02, + 0x82, + 0x42, + 0xC2, + 0x22, + 0xA2, + 0x62, + 0xE2, + 0x12, + 0x92, + 0x52, + 0xD2, + 0x32, + 0xB2, + 0x72, + 0xF2, + 0x0A, + 0x8A, + 0x4A, + 0xCA, + 0x2A, + 0xAA, + 0x6A, + 0xEA, + 0x1A, + 0x9A, + 0x5A, + 0xDA, + 0x3A, + 0xBA, + 0x7A, + 0xFA, + 0x06, + 0x86, + 0x46, + 0xC6, + 0x26, + 0xA6, + 0x66, + 0xE6, + 0x16, + 0x96, + 0x56, + 0xD6, + 0x36, + 0xB6, + 0x76, + 0xF6, + 0x0E, + 0x8E, + 0x4E, + 0xCE, + 0x2E, + 0xAE, + 0x6E, + 0xEE, + 0x1E, + 0x9E, + 0x5E, + 0xDE, + 0x3E, + 0xBE, + 0x7E, + 0xFE, + 0x01, + 0x81, + 0x41, + 0xC1, + 0x21, + 0xA1, + 0x61, + 0xE1, + 0x11, + 0x91, + 0x51, + 0xD1, + 0x31, + 0xB1, + 0x71, + 0xF1, + 0x09, + 0x89, + 0x49, + 0xC9, + 0x29, + 0xA9, + 0x69, + 0xE9, + 0x19, + 0x99, + 0x59, + 0xD9, + 0x39, + 0xB9, + 0x79, + 0xF9, + 0x05, + 0x85, + 0x45, + 0xC5, + 0x25, + 0xA5, + 0x65, + 0xE5, + 0x15, + 0x95, + 0x55, + 0xD5, + 0x35, + 0xB5, + 0x75, + 0xF5, + 0x0D, + 0x8D, + 0x4D, + 0xCD, + 0x2D, + 0xAD, + 0x6D, + 0xED, + 0x1D, + 0x9D, + 0x5D, + 0xDD, + 0x3D, + 0xBD, + 0x7D, + 0xFD, + 0x03, + 0x83, + 0x43, + 0xC3, + 0x23, + 0xA3, + 0x63, + 0xE3, + 0x13, + 0x93, + 0x53, + 0xD3, + 0x33, + 0xB3, + 0x73, + 0xF3, + 0x0B, + 0x8B, + 0x4B, + 0xCB, + 0x2B, + 0xAB, + 0x6B, + 0xEB, + 0x1B, + 0x9B, + 0x5B, + 0xDB, + 0x3B, + 0xBB, + 0x7B, + 0xFB, + 0x07, + 0x87, + 0x47, + 0xC7, + 0x27, + 0xA7, + 0x67, + 0xE7, + 0x17, + 0x97, + 0x57, + 0xD7, + 0x37, + 0xB7, + 0x77, + 0xF7, + 0x0F, + 0x8F, + 0x4F, + 0xCF, + 0x2F, + 0xAF, + 0x6F, + 0xEF, + 0x1F, + 0x9F, + 0x5F, + 0xDF, + 0x3F, + 0xBF, + 0x7F, + 0xFF, +] + class NegativeDimensionError(Exception): pass @@ -139,9 +370,18 @@ def my_convert_load(string): def parse(cont, indent=1): if type(cont) is dict: - return b"<<\n"+b"\n".join( - [4 * indent * b" " + k + b" " + parse(v, indent+1) - for k, v in sorted(cont.items())])+b"\n"+4*(indent-1)*b" "+b">>" + return ( + b"<<\n" + + b"\n".join( + [ + 4 * indent * b" " + k + b" " + parse(v, indent + 1) + for k, v in sorted(cont.items()) + ] + ) + + b"\n" + + 4 * (indent - 1) * b" " + + b">>" + ) elif type(cont) is int: return str(cont).encode() elif type(cont) is float: @@ -159,13 +399,13 @@ def parse(cont, indent=1): elif type(cont) is str or isinstance(cont, bytes): if type(cont) is str and type(cont) is not bytes: raise TypeError( - "parse must be passed a bytes object in py3. Got: %s" % cont) + "parse must be passed a bytes object in py3. Got: %s" % cont + ) return cont elif isinstance(cont, list): - return b"[ "+b" ".join([parse(c, indent) for c in cont])+b" ]" + return b"[ " + b" ".join([parse(c, indent) for c in cont]) + b" ]" else: - raise TypeError("cannot handle type %s with content %s" % (type(cont), - cont)) + raise TypeError("cannot handle type %s with content %s" % (type(cont), cont)) class MyPdfDict(object): @@ -188,12 +428,18 @@ class MyPdfDict(object): def tostring(self): if self.stream is not None: return ( - ("%d 0 obj\n" % self.identifier).encode() + - parse(self.content) + - b"\nstream\n" + self.stream + b"\nendstream\nendobj\n") + ("%d 0 obj\n" % self.identifier).encode() + + parse(self.content) + + b"\nstream\n" + + self.stream + + b"\nendstream\nendobj\n" + ) else: - return ("%d 0 obj\n" % self.identifier).encode() + \ - parse(self.content) + b"\nendobj\n" + return ( + ("%d 0 obj\n" % self.identifier).encode() + + parse(self.content) + + b"\nendobj\n" + ) def __setitem__(self, key, value): self.content[key] = value @@ -202,9 +448,9 @@ class MyPdfDict(object): return self.content[key] -class MyPdfName(): +class MyPdfName: def __getattr__(self, name): - return b'/' + name.encode('ascii') + return b"/" + name.encode("ascii") MyPdfName = MyPdfName() @@ -212,14 +458,14 @@ MyPdfName = MyPdfName() class MyPdfObject(bytes): def __new__(cls, string): - return bytes.__new__(cls, string.encode('ascii')) + return bytes.__new__(cls, string.encode("ascii")) class MyPdfArray(list): pass -class MyPdfWriter(): +class MyPdfWriter: def __init__(self, version="1.3"): self.objects = [] # create an incomplete pages object so that a /Parent entry can be @@ -230,7 +476,7 @@ class MyPdfWriter(): self.pagearray = [] def addobj(self, obj): - newid = len(self.objects)+1 + newid = len(self.objects) + 1 obj.identifier = newid self.objects.append(obj) @@ -251,8 +497,8 @@ class MyPdfWriter(): # # the choice of binary characters is arbitrary but those four seem to # be used elsewhere. - pdfheader = ('%%PDF-%s\n' % self.version).encode('ascii') - pdfheader += b'%\xe2\xe3\xcf\xd3\n' + pdfheader = ("%%PDF-%s\n" % self.version).encode("ascii") + pdfheader += b"%\xe2\xe3\xcf\xd3\n" stream.write(pdfheader) # From section 3.4.3 of the PDF Reference (version 1.7): @@ -291,8 +537,10 @@ class MyPdfWriter(): for x in xreftable: stream.write(x) stream.write(b"trailer\n") - stream.write(parse({b"/Size": len(xreftable), b"/Info": info, - b"/Root": self.catalog})+b"\n") + stream.write( + parse({b"/Size": len(xreftable), b"/Info": info, b"/Root": self.catalog}) + + b"\n" + ) stream.write(b"startxref\n") stream.write(("%d\n" % xrefoffset).encode()) stream.write(b"%%EOF\n") @@ -307,49 +555,76 @@ class MyPdfWriter(): if PY3: - class MyPdfString(): + + class MyPdfString: @classmethod def encode(cls, string, hextype=False): if hextype: - return b'< ' + b' '.join( - ("%06x" % c).encode('ascii') for c in string) + b' >' + return ( + b"< " + + b" ".join(("%06x" % c).encode("ascii") for c in string) + + b" >" + ) else: try: - string = string.encode('ascii') + string = string.encode("ascii") except UnicodeEncodeError: - string = b"\xfe\xff"+string.encode("utf-16-be") + string = b"\xfe\xff" + string.encode("utf-16-be") # We should probably encode more here because at least # ghostscript interpretes a carriage return byte (0x0D) as a # new line byte (0x0A) # PDF supports: \n, \r, \t, \b and \f - string = string.replace(b'\\', b'\\\\') - string = string.replace(b'(', b'\\(') - string = string.replace(b')', b'\\)') - return b'(' + string + b')' + string = string.replace(b"\\", b"\\\\") + string = string.replace(b"(", b"\\(") + string = string.replace(b")", b"\\)") + return b"(" + string + b")" + + else: + class MyPdfString(object): @classmethod def encode(cls, string, hextype=False): if hextype: - return b'< ' + b' '.join( - ("%06x" % c).encode('ascii') for c in string) + b' >' + return ( + b"< " + + b" ".join(("%06x" % c).encode("ascii") for c in string) + + b" >" + ) else: # This mimics exactely to what pdfrw does. - string = string.replace(b'\\', b'\\\\') - string = string.replace(b'(', b'\\(') - string = string.replace(b')', b'\\)') - return b'(' + string + b')' + string = string.replace(b"\\", b"\\\\") + string = string.replace(b"(", b"\\(") + string = string.replace(b")", b"\\)") + return b"(" + string + b")" class pdfdoc(object): - def __init__(self, version="1.3", title=None, author=None, creator=None, - producer=None, creationdate=None, moddate=None, subject=None, - keywords=None, nodate=False, panes=None, initial_page=None, - magnification=None, page_layout=None, fit_window=False, - center_window=False, fullscreen=False, with_pdfrw=True): + def __init__( + self, + version="1.3", + title=None, + author=None, + creator=None, + producer=None, + creationdate=None, + moddate=None, + subject=None, + keywords=None, + nodate=False, + panes=None, + initial_page=None, + magnification=None, + page_layout=None, + fit_window=False, + center_window=False, + fullscreen=False, + with_pdfrw=True, + ): if with_pdfrw: try: from pdfrw import PdfWriter, PdfDict, PdfName, PdfString + self.with_pdfrw = True except ImportError: PdfWriter = MyPdfWriter @@ -379,17 +654,21 @@ class pdfdoc(object): if producer is not None and producer != "": self.info[PdfName.Producer] = PdfString.encode(producer) if creationdate is not None: - self.info[PdfName.CreationDate] = \ - PdfString.encode("D:"+datetime_to_pdfdate(creationdate)) + self.info[PdfName.CreationDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(creationdate) + ) elif not nodate: - self.info[PdfName.CreationDate] = \ - PdfString.encode("D:"+datetime_to_pdfdate(now)) + self.info[PdfName.CreationDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(now) + ) if moddate is not None: - self.info[PdfName.ModDate] = \ - PdfString.encode("D:"+datetime_to_pdfdate(moddate)) + self.info[PdfName.ModDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(moddate) + ) elif not nodate: self.info[PdfName.ModDate] = PdfString.encode( - "D:"+datetime_to_pdfdate(now)) + "D:" + datetime_to_pdfdate(now) + ) if subject is not None: self.info[PdfName.Subject] = PdfString.encode(subject) if keywords is not None: @@ -412,10 +691,25 @@ class pdfdoc(object): self.center_window = center_window self.fullscreen = fullscreen - def add_imagepage(self, color, imgwidthpx, imgheightpx, imgformat, imgdata, - imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, - pageheight, userunit=None, palette=None, inverted=False, - depth=0): + def add_imagepage( + self, + color, + imgwidthpx, + imgheightpx, + imgformat, + imgdata, + imgwidthpdf, + imgheightpdf, + imgxpdf, + imgypdf, + pagewidth, + pageheight, + userunit=None, + palette=None, + inverted=False, + depth=0, + rotate=0, + ): if self.with_pdfrw: from pdfrw import PdfDict, PdfName, PdfObject, PdfString from pdfrw.py23_diffs import convert_load @@ -426,22 +720,27 @@ class pdfdoc(object): PdfString = MyPdfString convert_load = my_convert_load - if color == Colorspace['1'] or color == Colorspace.L: + if color == Colorspace["1"] or color == Colorspace.L: colorspace = PdfName.DeviceGray elif color == Colorspace.RGB: colorspace = PdfName.DeviceRGB - elif color == Colorspace.CMYK or color == Colorspace['CMYK;I']: + elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: colorspace = PdfName.DeviceCMYK elif color == Colorspace.P: if self.with_pdfrw: - raise Exception("pdfrw does not support hex strings for " - "palette image input, re-run with " - "--without-pdfrw") - colorspace = [PdfName.Indexed, PdfName.DeviceRGB, len(palette)-1, - PdfString.encode(palette, hextype=True)] + raise Exception( + "pdfrw does not support hex strings for " + "palette image input, re-run with " + "--without-pdfrw" + ) + colorspace = [ + PdfName.Indexed, + PdfName.DeviceRGB, + len(palette) - 1, + PdfString.encode(palette, hextype=True), + ] else: - raise UnsupportedColorspaceError("unsupported color space: %s" - % color.name) + raise UnsupportedColorspaceError("unsupported color space: %s" % color.name) # either embed the whole jpeg or deflate the bitmap representation if imgformat is ImageFormat.JPEG: @@ -464,7 +763,7 @@ class pdfdoc(object): image[PdfName.ColorSpace] = colorspace image[PdfName.BitsPerComponent] = depth - if color == Colorspace['CMYK;I']: + if color == Colorspace["CMYK;I"]: # Inverts all four channels image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0] @@ -474,16 +773,16 @@ class pdfdoc(object): # encoding. We set it to -1 because we want Group 4 encoding. decodeparms[PdfName.K] = -1 if inverted: - decodeparms[PdfName.BlackIs1] = PdfObject('false') + decodeparms[PdfName.BlackIs1] = PdfObject("false") else: - decodeparms[PdfName.BlackIs1] = PdfObject('true') + decodeparms[PdfName.BlackIs1] = PdfObject("true") decodeparms[PdfName.Columns] = imgwidthpx decodeparms[PdfName.Rows] = imgheightpx image[PdfName.DecodeParms] = [decodeparms] elif imgformat is ImageFormat.PNG: decodeparms = PdfDict() decodeparms[PdfName.Predictor] = 15 - if color in [Colorspace.P, Colorspace['1'], Colorspace.L]: + if color in [Colorspace.P, Colorspace["1"], Colorspace.L]: decodeparms[PdfName.Colors] = 1 else: decodeparms[PdfName.Colors] = 3 @@ -491,8 +790,10 @@ class pdfdoc(object): decodeparms[PdfName.BitsPerComponent] = depth image[PdfName.DecodeParms] = decodeparms - text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" % - (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii") + text = ( + "q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" + % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf) + ).encode("ascii") content = PdfDict(stream=convert_load(text)) resources = PdfDict(XObject=PdfDict(Im0=image)) @@ -502,10 +803,12 @@ class pdfdoc(object): page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight] page[PdfName.Resources] = resources page[PdfName.Contents] = content + if rotate != 0: + page[PdfName.Rotate] = rotate if userunit is not None: # /UserUnit requires PDF 1.6 - if self.writer.version < '1.6': - self.writer.version = '1.6' + if self.writer.version < "1.6": + self.writer.version = "1.6" page[PdfName.UserUnit] = userunit self.writer.addpage(page) @@ -527,8 +830,8 @@ class pdfdoc(object): PdfName = MyPdfName PdfObject = MyPdfObject PdfArray = MyPdfArray - NullObject = PdfObject('null') - TrueObject = PdfObject('true') + NullObject = PdfObject("null") + TrueObject = PdfObject("true") # We fill the catalog with more information like /ViewerPreferences, # /PageMode, /PageLayout or /OpenAction because the latter refers to a @@ -543,23 +846,30 @@ class pdfdoc(object): else: catalog = self.writer.catalog - if self.fullscreen or self.fit_window or self.center_window or \ - self.panes is not None: + if ( + self.fullscreen + or self.fit_window + or self.center_window + or self.panes is not None + ): catalog[PdfName.ViewerPreferences] = PdfDict() if self.fullscreen: # this setting might be overwritten later by the page mode - catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \ - = PdfName.UseNone + catalog[PdfName.ViewerPreferences][ + PdfName.NonFullScreenPageMode + ] = PdfName.UseNone if self.panes == PageMode.thumbs: - catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \ - = PdfName.UseThumbs + catalog[PdfName.ViewerPreferences][ + PdfName.NonFullScreenPageMode + ] = PdfName.UseThumbs # this setting might be overwritten later if fullscreen catalog[PdfName.PageMode] = PdfName.UseThumbs elif self.panes == PageMode.outlines: - catalog[PdfName.ViewerPreferences][PdfName.NonFullScreenPageMode] \ - = PdfName.UseOutlines + catalog[PdfName.ViewerPreferences][ + PdfName.NonFullScreenPageMode + ] = PdfName.UseOutlines # this setting might be overwritten later if fullscreen catalog[PdfName.PageMode] = PdfName.UseOutlines elif self.panes in [PageMode.none, None]: @@ -571,8 +881,7 @@ class pdfdoc(object): catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject if self.center_window: - catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = \ - TrueObject + catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = TrueObject if self.fullscreen: catalog[PdfName.PageMode] = PdfName.FullScreen @@ -598,24 +907,27 @@ class pdfdoc(object): # account if self.initial_page is not None: initial_page = self.writer.pagearray[self.initial_page - 1] - catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.XYZ, - NullObject, NullObject, 0]) + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.XYZ, NullObject, NullObject, 0] + ) if self.magnification == Magnification.fit: catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit]) elif self.magnification == Magnification.fith: pagewidth = initial_page[PdfName.MediaBox][2] catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.FitH, pagewidth]) + [initial_page, PdfName.FitH, pagewidth] + ) elif self.magnification == Magnification.fitbh: # quick hack to determine the image width on the page imgwidth = float(initial_page[PdfName.Contents].stream.split()[4]) catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.FitBH, imgwidth]) + [initial_page, PdfName.FitBH, imgwidth] + ) elif isinstance(self.magnification, float): catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.XYZ, NullObject, NullObject, - self.magnification]) + [initial_page, PdfName.XYZ, NullObject, NullObject, self.magnification] + ) elif self.magnification is None: pass else: @@ -643,8 +955,7 @@ class pdfdoc(object): def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): - if imgformat == ImageFormat.JPEG2000 \ - and rawdata is not None and imgdata is None: + if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: # this codepath gets called if the PIL installation is not able to # handle JPEG2000 files imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata) @@ -667,12 +978,15 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): ics = imgdata.mode if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info: - logging.warning("Image contains transparency which cannot be retained " - "in PDF.") + logging.warning( + "Image contains transparency which cannot be retained " "in PDF." + ) logging.warning("img2pdf will not perform a lossy operation.") logging.warning("You can remove the alpha channel using imagemagick:") - logging.warning(" $ convert input.png -background white -alpha " - "remove -alpha off output.png") + logging.warning( + " $ convert input.png -background white -alpha " + "remove -alpha off output.png" + ) raise Exception("Refusing to work on images with alpha channel") # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0 @@ -685,11 +999,39 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): # PIL defaults to a dpi of 1 if a TIFF image does not specify the dpi. # In that case, we want to use a different default. if ndpi == (1, 1) and imgformat == ImageFormat.TIFF: - ndpi = (imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi), - imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi)) + ndpi = ( + imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi), + imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi), + ) logging.debug("input dpi = %d x %d", *ndpi) + rotation = 0 + if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None: + for tag, value in imgdata._getexif().items(): + if TAGS.get(tag, tag) == "Orientation": + # Detailed information on EXIF rotation tags: + # http://impulseadventure.com/photo/exif-orientation.html + if value == 1: + rotation = 0 + elif value == 6: + rotation = 90 + elif value == 3: + rotation = 180 + elif value == 8: + rotation = 270 + elif value in (2, 4, 5, 7): + raise Exception( + 'Image "%s": Unsupported flipped ' + "rotation mode (%d)" % (im.name, value) + ) + else: + raise Exception( + 'Image "%s": invalid rotation (%d)' % (im.name, value) + ) + + logging.debug("rotation = %d°", rotation) + if colorspace: color = colorspace logging.debug("input colorspace (forced) = %s", color) @@ -704,8 +1046,11 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): # that info manually by looking at byte 10 in the IHDR chunk. We # know where to find that in the file because the IHDR chunk must # be the first chunk - if rawdata is not None and imgformat == ImageFormat.PNG \ - and rawdata[25] == 0: + if ( + rawdata is not None + and imgformat == ImageFormat.PNG + and rawdata[25] == 0 + ): color = Colorspace.L else: raise ValueError("unknown colorspace") @@ -718,25 +1063,27 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): # better for images currently in the wild, so I'm going # with the first approach for now. if "adobe" in imgdata.info: - color = Colorspace['CMYK;I'] + color = Colorspace["CMYK;I"] logging.debug("input colorspace = %s", color.name) logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) - return (color, ndpi, imgwidthpx, imgheightpx) + return (color, ndpi, imgwidthpx, imgheightpx, rotation) def ccitt_payload_location_from_pil(img): # If Pillow is passed an invalid compression argument it will ignore it; # make sure the image actually got compressed. - if img.info['compression'] != 'group4': - raise ValueError("Image not compressed with CCITT Group 4 but with: %s" - % img.info['compression']) + if img.info["compression"] != "group4": + raise ValueError( + "Image not compressed with CCITT Group 4 but with: %s" + % img.info["compression"] + ) # Read the TIFF tags to find the offset(s) of the compressed data strips. strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS] strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] - rows_per_strip = img.tag_v2.get(TiffImagePlugin.ROWSPERSTRIP, 2**32 - 1) + rows_per_strip = img.tag_v2.get(TiffImagePlugin.ROWSPERSTRIP, 2 ** 32 - 1) # PIL always seems to create a single strip even for very large TIFFs when # it saves images, so assume we only have to read a single strip. @@ -745,7 +1092,7 @@ def ccitt_payload_location_from_pil(img): if len(strip_offsets) != 1 or len(strip_bytes) != 1: raise NotImplementedError("Transcoding multiple strips not supported") - (offset, ), (length, ) = strip_offsets, strip_bytes + (offset,), (length,) = strip_offsets, strip_bytes logging.debug("TIFF strip_offsets: %d" % offset) logging.debug("TIFF strip_bytes: %d" % length) @@ -767,7 +1114,7 @@ def transcode_monochrome(imgdata): # killed by a SIGABRT: # https://gitlab.mister-muffin.de/josch/img2pdf/issues/46 im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes()) - im.save(newimgio, format='TIFF', compression='group4') + im.save(newimgio, format="TIFF", compression="group4") # Open new image in memory newimgio.seek(0) @@ -785,12 +1132,12 @@ def parse_png(rawdata): i = 16 while i < len(rawdata): # once we can require Python >= 3.2 we can use int.from_bytes() instead - n, = struct.unpack('>I', rawdata[i-8:i-4]) + n, = struct.unpack(">I", rawdata[i - 8 : i - 4]) if i + n > len(rawdata): raise Exception("invalid png: %d %d %d" % (i, n, len(rawdata))) - if rawdata[i-4:i] == b"IDAT": - pngidat += rawdata[i:i+n] - elif rawdata[i-4:i] == b"PLTE": + if rawdata[i - 4 : i] == b"IDAT": + pngidat += rawdata[i : i + n] + elif rawdata[i - 4 : i] == b"PLTE": # This could be as simple as saying "palette = rawdata[i:i+n]" but # pdfrw does only escape parenthesis and backslashes in the raw # byte stream. But raw carriage return bytes are interpreted as @@ -800,10 +1147,10 @@ def parse_png(rawdata): # (and escape more bytes) but since we cannot use pdfrw anyways, # we choose the more human readable variant. # See https://github.com/pmaupin/pdfrw/issues/147 - for j in range(i, i+n, 3): + for j in range(i, i + n, 3): # with int.from_bytes() we would not have to prepend extra # zeroes - color, = struct.unpack('>I', b'\x00'+rawdata[j:j+3]) + color, = struct.unpack(">I", b"\x00" + rawdata[j : j + 3]) palette.append(color) i += n i += 12 @@ -819,8 +1166,10 @@ def read_images(rawdata, colorspace, first_frame_only=False): except IOError as e: # test if it is a jpeg2000 image if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": - raise ImageOpenError("cannot read input image (not jpeg2000). " - "PIL: error reading image: %s" % e) + raise ImageOpenError( + "cannot read input image (not jpeg2000). " + "PIL: error reading image: %s" % e + ) # image is jpeg2000 imgformat = ImageFormat.JPEG2000 else: @@ -838,18 +1187,31 @@ def read_images(rawdata, colorspace, first_frame_only=False): # JPEG and JPEG2000 can be embedded into the PDF as-is if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: - color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata) - if color == Colorspace['1']: + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata + ) + if color == Colorspace["1"]: raise JpegColorspaceError("jpeg can't be monochrome") - if color == Colorspace['P']: + if color == Colorspace["P"]: raise JpegColorspaceError("jpeg can't have a color palette") - if color == Colorspace['RGBA']: + if color == Colorspace["RGBA"]: raise JpegColorspaceError("jpeg can't have an alpha channel") im.close() logging.debug("read_images() embeds a JPEG") - return [(color, ndpi, imgformat, rawdata, imgwidthpx, imgheightpx, [], - False, 8)] + return [ + ( + color, + ndpi, + imgformat, + rawdata, + imgwidthpx, + imgheightpx, + [], + False, + 8, + rotation, + ) + ] # We can directly embed the IDAT chunk of PNG images if the PNG is not # interlaced @@ -859,8 +1221,9 @@ def read_images(rawdata, colorspace, first_frame_only=False): # IHDR chunk. We know where to find that in the file because the IHDR chunk # must be the first chunk. if imgformat == ImageFormat.PNG and rawdata[28] == 0: - color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata) + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata + ) pngidat, palette = parse_png(rawdata) im.close() # PIL does not provide the information about the original bits per @@ -871,8 +1234,20 @@ def read_images(rawdata, colorspace, first_frame_only=False): if depth not in [1, 2, 4, 8, 16]: raise ValueError("invalid bit depth: %d" % depth) logging.debug("read_images() embeds a PNG") - return [(color, ndpi, imgformat, pngidat, imgwidthpx, imgheightpx, - palette, False, depth)] + return [ + ( + color, + ndpi, + imgformat, + pngidat, + imgwidthpx, + imgheightpx, + palette, + False, + depth, + rotation, + ) + ] # If our input is not JPEG or PNG, then we might have a format that # supports multiple frames (like TIFF or GIF), so we need a loop to @@ -905,24 +1280,31 @@ def read_images(rawdata, colorspace, first_frame_only=False): # # Some tiff images do not have BITSPERSAMPLE set. Use this to create # such a tiff: tiffset -u 258 test.tif - if imgformat == ImageFormat.TIFF \ - and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8: + if ( + imgformat == ImageFormat.TIFF + and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8 + ): raise ValueError("PIL is unable to preserve more than 8 bits per sample") # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it # only contains a single strip - if imgformat == ImageFormat.TIFF \ - and imgdata.info['compression'] == "group4" \ - and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1: + if ( + imgformat == ImageFormat.TIFF + and imgdata.info["compression"] == "group4" + and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1 + ): photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION] inverted = False if photo == 0: inverted = True elif photo != 1: - raise ValueError("unsupported photometric interpretation for " - "group4 tiff: %d" % photo) - color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata) + raise ValueError( + "unsupported photometric interpretation for " + "group4 tiff: %d" % photo + ) + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata + ) offset, length = ccitt_payload_location_from_pil(imgdata) im.seek(offset) rawdata = im.read(length) @@ -943,45 +1325,85 @@ def read_images(rawdata, colorspace, first_frame_only=False): else: raise ValueError("unsupported FillOrder: %d" % fillorder) logging.debug("read_images() embeds Group4 from TIFF") - result.append((color, ndpi, ImageFormat.CCITTGroup4, rawdata, - imgwidthpx, imgheightpx, [], inverted, 1)) + result.append( + ( + color, + ndpi, + ImageFormat.CCITTGroup4, + rawdata, + imgwidthpx, + imgheightpx, + [], + inverted, + 1, + rotation, + ) + ) img_page_count += 1 continue logging.debug("Converting frame: %d" % img_page_count) - color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace) + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace + ) newimg = None - if color == Colorspace['1']: + if color == Colorspace["1"]: try: ccittdata = transcode_monochrome(imgdata) - logging.debug( - "read_images() encoded a B/W image as CCITT group 4") - result.append((color, ndpi, ImageFormat.CCITTGroup4, ccittdata, - imgwidthpx, imgheightpx, [], False, 1)) + logging.debug("read_images() encoded a B/W image as CCITT group 4") + result.append( + ( + color, + ndpi, + ImageFormat.CCITTGroup4, + ccittdata, + imgwidthpx, + imgheightpx, + [], + False, + 1, + rotation, + ) + ) img_page_count += 1 continue except Exception as e: logging.debug(e) logging.debug("Converting colorspace 1 to L") - newimg = imgdata.convert('L') + newimg = imgdata.convert("L") color = Colorspace.L - elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, - Colorspace["CMYK;I"], Colorspace.P]: + elif color in [ + Colorspace.RGB, + Colorspace.L, + Colorspace.CMYK, + Colorspace["CMYK;I"], + Colorspace.P, + ]: logging.debug("Colorspace is OK: %s", color) newimg = imgdata else: - raise ValueError("unknown or unsupported colorspace: %s" - % color.name) + raise ValueError("unknown or unsupported colorspace: %s" % color.name) # the PNG format does not support CMYK, so we fall back to normal # compression if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]: imggz = zlib.compress(newimg.tobytes()) logging.debug("read_images() encoded CMYK with flate compression") - result.append((color, ndpi, imgformat, imggz, imgwidthpx, - imgheightpx, [], False, 8)) + result.append( + ( + color, + ndpi, + imgformat, + imggz, + imgwidthpx, + imgheightpx, + [], + False, + 8, + rotation, + ) + ) else: # cheapo version to retrieve a PNG encoding of the payload is to # just save it with PIL. In the future this could be replaced by @@ -998,8 +1420,20 @@ def read_images(rawdata, colorspace, first_frame_only=False): if depth not in [1, 2, 4, 8, 16]: raise ValueError("invalid bit depth: %d" % depth) logging.debug("read_images() encoded an image as PNG") - result.append((color, ndpi, ImageFormat.PNG, pngidat, imgwidthpx, - imgheightpx, palette, False, depth)) + result.append( + ( + color, + ndpi, + ImageFormat.PNG, + pngidat, + imgwidthpx, + imgheightpx, + palette, + False, + depth, + rotation, + ) + ) img_page_count += 1 # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method @@ -1013,71 +1447,80 @@ def read_images(rawdata, colorspace, first_frame_only=False): # converts a length in pixels to a length in PDF units (1/72 of an inch) def px_to_pt(length, dpi): - return 72.0*length/dpi + return 72.0 * length / dpi def cm_to_pt(length): - return (72.0*length)/2.54 + return (72.0 * length) / 2.54 def mm_to_pt(length): - return (72.0*length)/25.4 + return (72.0 * length) / 25.4 def in_to_pt(length): - return 72.0*length + return 72.0 * length -def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None, - auto_orient=False): +def get_layout_fun( + pagesize=None, imgsize=None, border=None, fit=None, auto_orient=False +): def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight): if fitwidth is None and fitheight is None: raise ValueError("fitwidth and fitheight cannot both be None") # if fit is fill or enlarge then it is okay if one of the dimensions # are negative but one of them must still be positive # if fit is not fill or enlarge then both dimensions must be positive - if fit in [FitMode.fill, FitMode.enlarge] and \ - fitwidth is not None and fitwidth < 0 and \ - fitheight is not None and fitheight < 0: - raise ValueError("cannot fit into a rectangle where both " - "dimensions are negative") - elif fit not in [FitMode.fill, FitMode.enlarge] and \ - ((fitwidth is not None and fitwidth < 0) or - (fitheight is not None and fitheight < 0)): - raise Exception("cannot fit into a rectangle where either " - "dimensions are negative") + if ( + fit in [FitMode.fill, FitMode.enlarge] + and fitwidth is not None + and fitwidth < 0 + and fitheight is not None + and fitheight < 0 + ): + raise ValueError( + "cannot fit into a rectangle where both " "dimensions are negative" + ) + elif fit not in [FitMode.fill, FitMode.enlarge] and ( + (fitwidth is not None and fitwidth < 0) + or (fitheight is not None and fitheight < 0) + ): + raise Exception( + "cannot fit into a rectangle where either " "dimensions are negative" + ) def default(): if fitwidth is not None and fitheight is not None: newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight)/imgwidth + newimgheight = (newimgwidth * imgheight) / imgwidth if newimgheight > fitheight: newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth)/imgheight + newimgwidth = (newimgheight * imgwidth) / imgheight elif fitwidth is None and fitheight is not None: newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth)/imgheight + newimgwidth = (newimgheight * imgwidth) / imgheight elif fitheight is None and fitwidth is not None: newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight)/imgwidth + newimgheight = (newimgwidth * imgheight) / imgwidth else: raise ValueError("fitwidth and fitheight cannot both be None") return newimgwidth, newimgheight + if fit is None or fit == FitMode.into: return default() elif fit == FitMode.fill: if fitwidth is not None and fitheight is not None: newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight)/imgwidth + newimgheight = (newimgwidth * imgheight) / imgwidth if newimgheight < fitheight: newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth)/imgheight + newimgwidth = (newimgheight * imgwidth) / imgheight elif fitwidth is None and fitheight is not None: newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth)/imgheight + newimgwidth = (newimgheight * imgwidth) / imgheight elif fitheight is None and fitwidth is not None: newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight)/imgwidth + newimgheight = (newimgwidth * imgheight) / imgwidth else: raise ValueError("fitwidth and fitheight cannot both be None") return newimgwidth, newimgheight @@ -1086,10 +1529,10 @@ def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None, return fitwidth, fitheight elif fitwidth is None and fitheight is not None: newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth)/imgheight + newimgwidth = (newimgheight * imgwidth) / imgheight elif fitheight is None and fitwidth is not None: newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight)/imgwidth + newimgheight = (newimgwidth * imgheight) / imgwidth else: raise ValueError("fitwidth and fitheight cannot both be None") return newimgwidth, newimgheight @@ -1121,17 +1564,20 @@ def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None, return default() else: raise NotImplementedError + # if no layout arguments are given, then the image size is equal to the # page size and will be drawn with the default dpi if pagesize is None and imgsize is None and border is None: return default_layout_fun if pagesize is None and imgsize is None and border is not None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0]) imgheightpdf = px_to_pt(imgheightpx, ndpi[1]) - pagewidth = imgwidthpdf+2*border[1] - pageheight = imgheightpdf+2*border[0] + pagewidth = imgwidthpdf + 2 * border[1] + pageheight = imgheightpdf + 2 * border[0] return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun if border is None: border = (0, 0) @@ -1139,46 +1585,62 @@ def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None, # will be calculated from the pagesize, taking into account the border # and the fitting if pagesize is not None and imgsize is None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): - if pagesize[0] is not None and pagesize[1] is not None and \ - auto_orient and \ - ((imgwidthpx > imgheightpx and - pagesize[0] < pagesize[1]) or - (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])): + if ( + pagesize[0] is not None + and pagesize[1] is not None + and auto_orient + and ( + (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1]) + or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1]) + ) + ): pagewidth, pageheight = pagesize[1], pagesize[0] newborder = border[1], border[0] else: pagewidth, pageheight = pagesize[0], pagesize[1] newborder = border if pagewidth is not None: - fitwidth = pagewidth-2*newborder[1] + fitwidth = pagewidth - 2 * newborder[1] else: fitwidth = None if pageheight is not None: - fitheight = pageheight-2*newborder[0] + fitheight = pageheight - 2 * newborder[0] else: fitheight = None - if fit in [FitMode.fill, FitMode.enlarge] and \ - fitwidth is not None and fitwidth < 0 and \ - fitheight is not None and fitheight < 0: + if ( + fit in [FitMode.fill, FitMode.enlarge] + and fitwidth is not None + and fitwidth < 0 + and fitheight is not None + and fitheight < 0 + ): raise NegativeDimensionError( "at least one border dimension musts be smaller than half " - "the respective page dimension") - elif fit not in [FitMode.fill, FitMode.enlarge] \ - and ((fitwidth is not None and fitwidth < 0) or - (fitheight is not None and fitheight < 0)): + "the respective page dimension" + ) + elif fit not in [FitMode.fill, FitMode.enlarge] and ( + (fitwidth is not None and fitwidth < 0) + or (fitheight is not None and fitheight < 0) + ): raise NegativeDimensionError( "one border dimension is larger than half of the " - "respective page dimension") - imgwidthpdf, imgheightpdf = \ - fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]), - px_to_pt(imgheightpx, ndpi[1]), - fitwidth, fitheight) + "respective page dimension" + ) + imgwidthpdf, imgheightpdf = fitfun( + fit, + px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + fitwidth, + fitheight, + ) if pagewidth is None: - pagewidth = imgwidthpdf+border[1]*2 + pagewidth = imgwidthpdf + border[1] * 2 if pageheight is None: - pageheight = imgheightpdf+border[0]*2 + pageheight = imgheightpdf + border[0] * 2 return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun def scale_imgsize(s, px, dpi): @@ -1188,37 +1650,50 @@ def get_layout_fun(pagesize=None, imgsize=None, border=None, fit=None, if mode == ImgSize.abs: return value if mode == ImgSize.perc: - return (px_to_pt(px, dpi)*value)/100 + return (px_to_pt(px, dpi) * value) / 100 if mode == ImgSize.dpi: return px_to_pt(px, value) raise NotImplementedError + if pagesize is None and imgsize is not None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): - imgwidthpdf, imgheightpdf = \ - fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]), - px_to_pt(imgheightpx, ndpi[1]), - scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), - scale_imgsize(imgsize[1], imgheightpx, ndpi[1])) - pagewidth = imgwidthpdf+2*border[1] - pageheight = imgheightpdf+2*border[0] + imgwidthpdf, imgheightpdf = fitfun( + fit, + px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), + scale_imgsize(imgsize[1], imgheightpx, ndpi[1]), + ) + pagewidth = imgwidthpdf + 2 * border[1] + pageheight = imgheightpdf + 2 * border[0] return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun if pagesize is not None and imgsize is not None: + def layout_fun(imgwidthpx, imgheightpx, ndpi): - if pagesize[0] is not None and pagesize[1] is not None and \ - auto_orient and \ - ((imgwidthpx > imgheightpx and - pagesize[0] < pagesize[1]) or - (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])): + if ( + pagesize[0] is not None + and pagesize[1] is not None + and auto_orient + and ( + (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1]) + or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1]) + ) + ): pagewidth, pageheight = pagesize[1], pagesize[0] else: pagewidth, pageheight = pagesize[0], pagesize[1] - imgwidthpdf, imgheightpdf = \ - fitfun(fit, px_to_pt(imgwidthpx, ndpi[0]), - px_to_pt(imgheightpx, ndpi[1]), - scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), - scale_imgsize(imgsize[1], imgheightpx, ndpi[1])) + imgwidthpdf, imgheightpdf = fitfun( + fit, + px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), + scale_imgsize(imgsize[1], imgheightpx, ndpi[1]), + ) return pagewidth, pageheight, imgwidthpdf, imgheightpdf + return layout_fun raise NotImplementedError @@ -1235,8 +1710,10 @@ def get_fixed_dpi_layout_fun(fixed_dpi): >>> layout_fun = get_fixed_dpi_layout_fun((300, 300)) >>> convert(image1, layout_fun=layout_fun, ... outputstream=...) """ + def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi): return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi) + return fixed_dpi_layout_fun @@ -1262,27 +1739,52 @@ def convert(*images, **kwargs): _default_kwargs = dict( title=None, - author=None, creator=None, producer=None, creationdate=None, - moddate=None, subject=None, keywords=None, colorspace=None, - nodate=False, layout_fun=default_layout_fun, viewer_panes=None, - viewer_initial_page=None, viewer_magnification=None, - viewer_page_layout=None, viewer_fit_window=False, - viewer_center_window=False, viewer_fullscreen=False, - with_pdfrw=True, outputstream=None, first_frame_only=False, - allow_oversized=True) + author=None, + creator=None, + producer=None, + creationdate=None, + moddate=None, + subject=None, + keywords=None, + colorspace=None, + nodate=False, + layout_fun=default_layout_fun, + viewer_panes=None, + viewer_initial_page=None, + viewer_magnification=None, + viewer_page_layout=None, + viewer_fit_window=False, + viewer_center_window=False, + viewer_fullscreen=False, + with_pdfrw=True, + outputstream=None, + first_frame_only=False, + allow_oversized=True, + ) for kwname, default in _default_kwargs.items(): if kwname not in kwargs: kwargs[kwname] = default pdf = pdfdoc( "1.3", - kwargs['title'], kwargs['author'], kwargs['creator'], - kwargs['producer'], kwargs['creationdate'], kwargs['moddate'], - kwargs['subject'], kwargs['keywords'], kwargs['nodate'], - kwargs['viewer_panes'], kwargs['viewer_initial_page'], - kwargs['viewer_magnification'], kwargs['viewer_page_layout'], - kwargs['viewer_fit_window'], kwargs['viewer_center_window'], - kwargs['viewer_fullscreen'], kwargs['with_pdfrw']) + kwargs["title"], + kwargs["author"], + kwargs["creator"], + kwargs["producer"], + kwargs["creationdate"], + kwargs["moddate"], + kwargs["subject"], + kwargs["keywords"], + kwargs["nodate"], + kwargs["viewer_panes"], + kwargs["viewer_initial_page"], + kwargs["viewer_magnification"], + kwargs["viewer_page_layout"], + kwargs["viewer_fit_window"], + kwargs["viewer_center_window"], + kwargs["viewer_fullscreen"], + kwargs["with_pdfrw"], + ) # backwards compatibility with older img2pdf versions where the first # argument to the function had to be given as a list @@ -1301,8 +1803,7 @@ def convert(*images, **kwargs): rawdata = img.read() except AttributeError: if not isinstance(img, (str, bytes)): - raise TypeError( - "Neither implements read() nor is str or bytes") + raise TypeError("Neither implements read() nor is str or bytes") # the thing doesn't have a read() function, so try if we can treat # it as a file name try: @@ -1314,18 +1815,29 @@ def convert(*images, **kwargs): # name so we now try treating it as raw image content rawdata = img - for color, ndpi, imgformat, imgdata, imgwidthpx, imgheightpx, \ - palette, inverted, depth in read_images( - rawdata, kwargs['colorspace'], kwargs['first_frame_only']): - pagewidth, pageheight, imgwidthpdf, imgheightpdf = \ - kwargs['layout_fun'](imgwidthpx, imgheightpx, ndpi) + for ( + color, + ndpi, + imgformat, + imgdata, + imgwidthpx, + imgheightpx, + palette, + inverted, + depth, + rotation, + ) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]): + pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"]( + imgwidthpx, imgheightpx, ndpi + ) userunit = None if pagewidth < 3.00 or pageheight < 3.00: - logging.warning("pdf width or height is below 3.00 - too " - "small for some viewers!") + logging.warning( + "pdf width or height is below 3.00 - too " "small for some viewers!" + ) elif pagewidth > 14400.0 or pageheight > 14400.0: - if kwargs['allow_oversized']: + if kwargs["allow_oversized"]: userunit = find_scale(pagewidth, pageheight) pagewidth /= userunit pageheight /= userunit @@ -1333,24 +1845,39 @@ def convert(*images, **kwargs): imgheightpdf /= userunit else: raise PdfTooLargeError( - "pdf width or height must not exceed 200 inches.") + "pdf width or height must not exceed 200 inches." + ) # the image is always centered on the page - imgxpdf = (pagewidth - imgwidthpdf)/2.0 - imgypdf = (pageheight - imgheightpdf)/2.0 - pdf.add_imagepage(color, imgwidthpx, imgheightpx, imgformat, - imgdata, imgwidthpdf, imgheightpdf, imgxpdf, - imgypdf, pagewidth, pageheight, userunit, - palette, inverted, depth) - - if kwargs['outputstream']: - pdf.tostream(kwargs['outputstream']) + imgxpdf = (pagewidth - imgwidthpdf) / 2.0 + imgypdf = (pageheight - imgheightpdf) / 2.0 + pdf.add_imagepage( + color, + imgwidthpx, + imgheightpx, + imgformat, + imgdata, + imgwidthpdf, + imgheightpdf, + imgxpdf, + imgypdf, + pagewidth, + pageheight, + userunit, + palette, + inverted, + depth, + rotation, + ) + + if kwargs["outputstream"]: + pdf.tostream(kwargs["outputstream"]) return return pdf.tostring() def parse_num(num, name): - if num == '': + if num == "": return None unit = None if num.endswith("pt"): @@ -1365,8 +1892,10 @@ def parse_num(num, name): try: num = float(num) except ValueError: - msg = "%s is not a floating point number and doesn't have a " \ - "valid unit: %s" % (name, num) + msg = ( + "%s is not a floating point number and doesn't have a " + "valid unit: %s" % (name, num) + ) raise argparse.ArgumentTypeError(msg) if unit is None: unit = Unit.pt @@ -1387,7 +1916,7 @@ def parse_num(num, name): def parse_imgsize_num(num, name): - if num == '': + if num == "": return None unit = None if num.endswith("pt"): @@ -1406,8 +1935,10 @@ def parse_imgsize_num(num, name): try: num = float(num) except ValueError: - msg = "%s is not a floating point number and doesn't have a " \ - "valid unit: %s" % (name, num) + msg = ( + "%s is not a floating point number and doesn't have a " + "valid unit: %s" % (name, num) + ) raise argparse.ArgumentTypeError(msg) if unit is None: unit = ImgUnit.pt @@ -1445,20 +1976,19 @@ def parse_pagesize_rectarg(string): string = string[:-2] if papersizes.get(string.lower()): string = papersizes[string.lower()] - if 'x' not in string: + if "x" not in string: # if there is no separating "x" in the string, then the string is # interpreted as the width w = parse_num(string, "width") h = None else: - w, h = string.split('x', 1) + w, h = string.split("x", 1) w = parse_num(w, "width") h = parse_num(h, "height") if transposed: w, h = h, w if w is None and h is None: - raise argparse.ArgumentTypeError("at least one dimension must be " - "specified") + raise argparse.ArgumentTypeError("at least one dimension must be " "specified") return w, h @@ -1468,20 +1998,19 @@ def parse_imgsize_rectarg(string): string = string[:-2] if papersizes.get(string.lower()): string = papersizes[string.lower()] - if 'x' not in string: + if "x" not in string: # if there is no separating "x" in the string, then the string is # interpreted as the width w = parse_imgsize_num(string, "width") h = None else: - w, h = string.split('x', 1) + w, h = string.split("x", 1) w = parse_imgsize_num(w, "width") h = parse_imgsize_num(h, "height") if transposed: w, h = h, w if w is None and h is None: - raise argparse.ArgumentTypeError("at least one dimension must be " - "specified") + raise argparse.ArgumentTypeError("at least one dimension must be " "specified") return w, h @@ -1490,19 +2019,20 @@ def parse_colorspacearg(string): if c.name == string: return c allowed = ", ".join([c.name for c in Colorspace]) - raise argparse.ArgumentTypeError("Unsupported colorspace: %s. Must be one " - "of: %s." % (string, allowed)) + raise argparse.ArgumentTypeError( + "Unsupported colorspace: %s. Must be one " "of: %s." % (string, allowed) + ) def parse_borderarg(string): - if ':' in string: - h, v = string.split(':', 1) - if h == '': + if ":" in string: + h, v = string.split(":", 1) + if h == "": raise argparse.ArgumentTypeError("missing value before colon") - if v == '': + if v == "": raise argparse.ArgumentTypeError("missing value after colon") else: - if string == '': + if string == "": raise argparse.ArgumentTypeError("border option cannot be empty") h, v = string, string h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border") @@ -1512,28 +2042,41 @@ def parse_borderarg(string): def input_images(path): - if path == '-': + if path == "-": # we slurp in all data from stdin because we need to seek in it later - result = sys.stdin.buffer.read() + if PY3: + result = sys.stdin.buffer.read() + else: + result = sys.stdin.read() if len(result) == 0: - raise argparse.ArgumentTypeError("\"%s\" is empty" % path) + raise argparse.ArgumentTypeError('"%s" is empty' % path) else: - try: - if os.path.getsize(path) == 0: - raise argparse.ArgumentTypeError("\"%s\" is empty" % path) - # test-read a byte from it so that we can abort early in case - # we cannot read data from the file - with open(path, "rb") as im: - im.read(1) - except IsADirectoryError: - raise argparse.ArgumentTypeError( - "\"%s\" is a directory" % path) - except PermissionError: - raise argparse.ArgumentTypeError( - "\"%s\" permission denied" % path) - except FileNotFoundError: - raise argparse.ArgumentTypeError( - "\"%s\" does not exist" % path) + if PY3: + try: + if os.path.getsize(path) == 0: + raise argparse.ArgumentTypeError('"%s" is empty' % path) + # test-read a byte from it so that we can abort early in case + # we cannot read data from the file + with open(path, "rb") as im: + im.read(1) + except IsADirectoryError: + raise argparse.ArgumentTypeError('"%s" is a directory' % path) + except PermissionError: + raise argparse.ArgumentTypeError('"%s" permission denied' % path) + except FileNotFoundError: + raise argparse.ArgumentTypeError('"%s" does not exist' % path) + else: + try: + if os.path.getsize(path) == 0: + raise argparse.ArgumentTypeError('"%s" is empty' % path) + # test-read a byte from it so that we can abort early in case + # we cannot read data from the file + with open(path, "rb") as im: + im.read(1) + except IOError as err: + raise argparse.ArgumentTypeError(str(err)) + except OSError as err: + raise argparse.ArgumentTypeError(str(err)) result = path return result @@ -1550,8 +2093,9 @@ def parse_panes(string): if m.name == string.lower(): return m allowed = ", ".join([m.name for m in PageMode]) - raise argparse.ArgumentTypeError("Unsupported page mode: %s. Must be one " - "of: %s." % (string, allowed)) + raise argparse.ArgumentTypeError( + "Unsupported page mode: %s. Must be one " "of: %s." % (string, allowed) + ) def parse_magnification(string): @@ -1563,9 +2107,10 @@ def parse_magnification(string): except ValueError: pass allowed = ", ".join([m.name for m in Magnification]) - raise argparse.ArgumentTypeError("Unsupported magnification: %s. Must be " - "a floating point number or one of: %s." % - (string, allowed)) + raise argparse.ArgumentTypeError( + "Unsupported magnification: %s. Must be " + "a floating point number or one of: %s." % (string, allowed) + ) def parse_layout(string): @@ -1573,8 +2118,9 @@ def parse_layout(string): if l.name == string.lower(): return l allowed = ", ".join([l.name for l in PageLayout]) - raise argparse.ArgumentTypeError("Unsupported page layout: %s. Must be " - "one of: %s." % (string, allowed)) + raise argparse.ArgumentTypeError( + "Unsupported page layout: %s. Must be " "one of: %s." % (string, allowed) + ) def valid_date(string): @@ -1616,14 +2162,14 @@ def valid_date(string): raise argparse.ArgumentTypeError("cannot parse date: %s" % string) -def main(): +def main(argv=sys.argv): rendered_papersizes = "" for k, v in sorted(papersizes.items()): rendered_papersizes += " %-8s %s\n" % (papernames[k], v) parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description='''\ + formatter_class=argparse.RawDescriptionHelpFormatter, + description="""\ Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000 images with the only added file size coming from the PDF container itself. @@ -1637,8 +2183,8 @@ or to another program as part of a shell pipe. To directly write the output into a file, use the -o or --output option. Options: -''', - epilog='''\ +""", + epilog="""\ Colorspace: Currently, the colorspace must be forced for JPEG 2000 images that are not in the RGB colorspace. Available colorspace options are based on Python Imaging @@ -1745,78 +2291,113 @@ Examples: Written by Johannes 'josch' Schauer Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues -''' % rendered_papersizes) +""" + % rendered_papersizes, + ) parser.add_argument( - 'images', metavar='infile', type=input_images, nargs='*', - help='Specifies the input file(s) in any format that can be read by ' - 'the Python Imaging Library (PIL). If no input images are given, then ' + "images", + metavar="infile", + type=input_images, + nargs="*", + help="Specifies the input file(s) in any format that can be read by " + "the Python Imaging Library (PIL). If no input images are given, then " 'a single image is read from standard input. The special filename "-" ' - 'can be used once to read an image from standard input. To read a ' + "can be used once to read an image from standard input. To read a " 'file in the current directory with the filename "-", pass it to ' - 'img2pdf by explicitly stating its relative path like "./-".') + 'img2pdf by explicitly stating its relative path like "./-".', + ) parser.add_argument( - '-v', '--verbose', action="store_true", - help='Makes the program operate in verbose mode, printing messages on ' - 'standard error.') + "-v", + "--verbose", + action="store_true", + help="Makes the program operate in verbose mode, printing messages on " + "standard error.", + ) parser.add_argument( - '-V', '--version', action='version', version='%(prog)s '+__version__, - help="Prints version information and exits.") + "-V", + "--version", + action="version", + version="%(prog)s " + __version__, + help="Prints version information and exits.", + ) outargs = parser.add_argument_group( - title='General output arguments', - description='Arguments controlling the output format.') - + title="General output arguments", + description="Arguments controlling the output format.", + ) + + # In Python3 we have to output to sys.stdout.buffer because we write are + # bytes and not strings. In certain situations, like when the main + # function is wrapped by contextlib.redirect_stdout(), sys.stdout does not + # have the buffer attribute. Thus we write to sys.stdout by default and + # to sys.stdout.buffer if it exists. outargs.add_argument( - '-o', '--output', metavar='out', type=argparse.FileType('wb'), - default=sys.stdout.buffer, - help='Makes the program output to a file instead of standard output.') + "-o", + "--output", + metavar="out", + type=argparse.FileType("wb"), + default=sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout, + help="Makes the program output to a file instead of standard output.", + ) outargs.add_argument( - '-C', '--colorspace', metavar='colorspace', type=parse_colorspacearg, - help=''' + "-C", + "--colorspace", + metavar="colorspace", + type=parse_colorspacearg, + help=""" Forces the PIL colorspace. See the epilogue for a list of possible values. Usually the PDF colorspace would be derived from the color space of the input image. This option overwrites the automatically detected colorspace from the input image and thus forces a certain colorspace in the output PDF /ColorSpace property. This is useful for JPEG 2000 images with a different colorspace than -RGB.''') +RGB.""", + ) outargs.add_argument( - '-D', '--nodate', action="store_true", - help='Suppresses timestamps in the output and thus makes the output ' - 'deterministic between individual runs. You can also manually ' - 'set a date using the --moddate and --creationdate options.') + "-D", + "--nodate", + action="store_true", + help="Suppresses timestamps in the output and thus makes the output " + "deterministic between individual runs. You can also manually " + "set a date using the --moddate and --creationdate options.", + ) outargs.add_argument( - "--without-pdfrw", action="store_true", + "--without-pdfrw", + action="store_true", help="By default, img2pdf uses the pdfrw library to create the output " - "PDF if pdfrw is available. If you want to use the internal PDF " - "generator of img2pdf even if pdfrw is present, then pass this " - "option. This can be useful if you want to have unicode metadata " - "values which pdfrw does not yet support (See " - "https://github.com/pmaupin/pdfrw/issues/39) or if you want the " - "PDF code to be more human readable.") + "PDF if pdfrw is available. If you want to use the internal PDF " + "generator of img2pdf even if pdfrw is present, then pass this " + "option. This can be useful if you want to have unicode metadata " + "values which pdfrw does not yet support (See " + "https://github.com/pmaupin/pdfrw/issues/39) or if you want the " + "PDF code to be more human readable.", + ) outargs.add_argument( - "--first-frame-only", action="store_true", + "--first-frame-only", + action="store_true", help="By default, img2pdf will convert multi-frame images like " - "multi-page TIFF or animated GIF images to one page per frame. " - "This option will only let the first frame of every multi-frame " - "input image be converted into a page in the resulting PDF." - ) + "multi-page TIFF or animated GIF images to one page per frame. " + "This option will only let the first frame of every multi-frame " + "input image be converted into a page in the resulting PDF.", + ) outargs.add_argument( - "--pillow-limit-break", action="store_true", + "--pillow-limit-break", + action="store_true", help="img2pdf uses the Python Imaging Library Pillow to read input " - "images. Pillow limits the maximum input image size to %d pixels " - "to prevent decompression bomb denial of service attacks. If " - "your input image contains more pixels than that, use this " - "option to disable this safety measure during this run of img2pdf" - % Image.MAX_IMAGE_PIXELS) + "images. Pillow limits the maximum input image size to %d pixels " + "to prevent decompression bomb denial of service attacks. If " + "your input image contains more pixels than that, use this " + "option to disable this safety measure during this run of img2pdf" + % Image.MAX_IMAGE_PIXELS, + ) sizeargs = parser.add_argument_group( - title='Image and page size and layout arguments', - description='''\ + title="Image and page size and layout arguments", + description="""\ Every input image will be placed on its own page. The image size is controlled by the dpi value of the input image or, if unset or missing, the default dpi of %.2f. By default, each page will have the same size as the image it shows. @@ -1858,26 +2439,39 @@ the image size will be calculated from the page size, respecting the border setting. If the --border option is given while both the --pagesize and --imgsize options are passed, then the --border option will be ignored. -''' % default_dpi) +""" + % default_dpi, + ) sizeargs.add_argument( - '-S', '--pagesize', metavar='LxL', type=parse_pagesize_rectarg, - help=''' + "-S", + "--pagesize", + metavar="LxL", + type=parse_pagesize_rectarg, + help=""" Sets the size of the PDF pages. The short-option is the upper case S because -it is an mnemonic for being bigger than the image size.''') +it is an mnemonic for being bigger than the image size.""", + ) sizeargs.add_argument( - '-s', '--imgsize', metavar='LxL', type=parse_imgsize_rectarg, - help=''' + "-s", + "--imgsize", + metavar="LxL", + type=parse_imgsize_rectarg, + help=""" Sets the size of the images on the PDF pages. In addition, the unit dpi is allowed which will set the image size as a value of dots per inch. Instead of a unit, width and height values may also have a percentage sign appended, indicating a resize of the image by that percentage. The short-option is the lower case s because it is an mnemonic for being smaller than the page size. -''') +""", + ) sizeargs.add_argument( - '-b', '--border', metavar='L[:L]', type=parse_borderarg, - help=''' + "-b", + "--border", + metavar="L[:L]", + type=parse_borderarg, + help=""" Specifies the minimal distance between the image border and the PDF page border. This value Is overwritten by explicit values set by --pagesize or --imgsize. The value will be used when calculating page dimensions from the @@ -1886,103 +2480,144 @@ as an argument, separated by a colon. One value specifies the minimal border on all four sides. Two values specify the minimal border on the top/bottom and left/right, respectively. It is not possible to specify asymmetric borders because images will always be centered on the page. -''') +""", + ) sizeargs.add_argument( - '-f', '--fit', metavar='FIT', type=parse_fitarg, - default=FitMode.into, help=''' + "-f", + "--fit", + metavar="FIT", + type=parse_fitarg, + default=FitMode.into, + help=""" If --imgsize is given, fits the image using these dimensions. Otherwise, fit the image into the dimensions given by --pagesize. FIT is one of into, fill, exact, shrink and enlarge. The default value is "into". See the epilogue at the bottom for a description of the FIT options. -''') +""", + ) sizeargs.add_argument( - '-a', '--auto-orient', action="store_true", - help=''' + "-a", + "--auto-orient", + action="store_true", + help=""" If both dimensions of the page are given via --pagesize, conditionally swaps these dimensions such that the page orientation is the same as the orientation of the input image. If the orientation of a page gets flipped, then so do the values set via the --border option. -''') +""", + ) metaargs = parser.add_argument_group( - title='Arguments setting metadata', - description='Options handling embedded timestamps, title and author ' - 'information.') + title="Arguments setting metadata", + description="Options handling embedded timestamps, title and author " + "information.", + ) metaargs.add_argument( - '--title', metavar='title', type=str, - help='Sets the title metadata value') + "--title", metavar="title", type=str, help="Sets the title metadata value" + ) metaargs.add_argument( - '--author', metavar='author', type=str, - help='Sets the author metadata value') + "--author", metavar="author", type=str, help="Sets the author metadata value" + ) metaargs.add_argument( - '--creator', metavar='creator', type=str, - help='Sets the creator metadata value') + "--creator", metavar="creator", type=str, help="Sets the creator metadata value" + ) metaargs.add_argument( - '--producer', metavar='producer', type=str, + "--producer", + metavar="producer", + type=str, default="img2pdf " + __version__, - help='Sets the producer metadata value ' - '(default is: img2pdf ' + __version__ + ')') + help="Sets the producer metadata value " + "(default is: img2pdf " + __version__ + ")", + ) metaargs.add_argument( - '--creationdate', metavar='creationdate', type=valid_date, - help='Sets the UTC creation date metadata value in YYYY-MM-DD or ' - 'YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format ' - 'understood by python dateutil module or any format understood ' - 'by `date --date`') + "--creationdate", + metavar="creationdate", + type=valid_date, + help="Sets the UTC creation date metadata value in YYYY-MM-DD or " + "YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format " + "understood by python dateutil module or any format understood " + "by `date --date`", + ) metaargs.add_argument( - '--moddate', metavar='moddate', type=valid_date, - help='Sets the UTC modification date metadata value in YYYY-MM-DD ' - 'or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format ' - 'understood by python dateutil module or any format understood ' - 'by `date --date`') + "--moddate", + metavar="moddate", + type=valid_date, + help="Sets the UTC modification date metadata value in YYYY-MM-DD " + "or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format " + "understood by python dateutil module or any format understood " + "by `date --date`", + ) metaargs.add_argument( - '--subject', metavar='subject', type=str, - help='Sets the subject metadata value') + "--subject", metavar="subject", type=str, help="Sets the subject metadata value" + ) metaargs.add_argument( - '--keywords', metavar='kw', type=str, nargs='+', - help='Sets the keywords metadata value (can be given multiple times)') + "--keywords", + metavar="kw", + type=str, + nargs="+", + help="Sets the keywords metadata value (can be given multiple times)", + ) viewerargs = parser.add_argument_group( - title='PDF viewer arguments', - description='PDF files can specify how they are meant to be ' - 'presented to the user by a PDF viewer') + title="PDF viewer arguments", + description="PDF files can specify how they are meant to be " + "presented to the user by a PDF viewer", + ) viewerargs.add_argument( - '--viewer-panes', metavar="PANES", type=parse_panes, - help='Instruct the PDF viewer which side panes to show. Valid values ' - 'are "outlines" and "thumbs". It is not possible to specify both ' - 'at the same time.') + "--viewer-panes", + metavar="PANES", + type=parse_panes, + help="Instruct the PDF viewer which side panes to show. Valid values " + 'are "outlines" and "thumbs". It is not possible to specify both ' + "at the same time.", + ) viewerargs.add_argument( - '--viewer-initial-page', metavar="NUM", type=int, - help='Instead of showing the first page, instruct the PDF viewer to ' - 'show the given page instead. Page numbers start with 1.') + "--viewer-initial-page", + metavar="NUM", + type=int, + help="Instead of showing the first page, instruct the PDF viewer to " + "show the given page instead. Page numbers start with 1.", + ) viewerargs.add_argument( - '--viewer-magnification', metavar="MAG", type=parse_magnification, - help='Instruct the PDF viewer to open the PDF with a certain zoom ' - 'level. Valid values are either a floating point number giving ' - 'the exact zoom level, "fit" (zoom to fit whole page), "fith" ' - '(zoom to fit page width) and "fitbh" (zoom to fit visible page ' - 'width).') + "--viewer-magnification", + metavar="MAG", + type=parse_magnification, + help="Instruct the PDF viewer to open the PDF with a certain zoom " + "level. Valid values are either a floating point number giving " + 'the exact zoom level, "fit" (zoom to fit whole page), "fith" ' + '(zoom to fit page width) and "fitbh" (zoom to fit visible page ' + "width).", + ) viewerargs.add_argument( - '--viewer-page-layout', metavar="LAYOUT", type=parse_layout, - help='Instruct the PDF viewer how to arrange the pages on the screen. ' - 'Valid values are "single" (display single pages), "onecolumn" ' - '(one continuous column), "twocolumnright" (two continuous ' - 'columns with odd number pages on the right) and "twocolumnleft" ' - '(two continuous columns with odd numbered pages on the left)') + "--viewer-page-layout", + metavar="LAYOUT", + type=parse_layout, + help="Instruct the PDF viewer how to arrange the pages on the screen. " + 'Valid values are "single" (display single pages), "onecolumn" ' + '(one continuous column), "twocolumnright" (two continuous ' + 'columns with odd number pages on the right) and "twocolumnleft" ' + "(two continuous columns with odd numbered pages on the left)", + ) viewerargs.add_argument( - '--viewer-fit-window', action="store_true", - help='Instruct the PDF viewer to resize the window to fit the page ' - 'size') + "--viewer-fit-window", + action="store_true", + help="Instruct the PDF viewer to resize the window to fit the page " "size", + ) viewerargs.add_argument( - '--viewer-center-window', action="store_true", - help='Instruct the PDF viewer to center the PDF viewer window') + "--viewer-center-window", + action="store_true", + help="Instruct the PDF viewer to center the PDF viewer window", + ) viewerargs.add_argument( - '--viewer-fullscreen', action="store_true", - help='Instruct the PDF viewer to open the PDF in fullscreen mode') + "--viewer-fullscreen", + action="store_true", + help="Instruct the PDF viewer to open the PDF in fullscreen mode", + ) - args = parser.parse_args() + args = parser.parse_args(argv[1:]) if args.verbose: logging.basicConfig(level=logging.DEBUG) @@ -1990,15 +2625,19 @@ values set via the --border option. if args.pillow_limit_break: Image.MAX_IMAGE_PIXELS = None - layout_fun = get_layout_fun(args.pagesize, args.imgsize, args.border, - args.fit, args.auto_orient) + layout_fun = get_layout_fun( + args.pagesize, args.imgsize, args.border, args.fit, args.auto_orient + ) # if no positional arguments were supplied, read a single image from # standard input if len(args.images) == 0: logging.info("reading image from standard input") try: - args.images = [sys.stdin.buffer.read()] + if PY3: + args.images = [sys.stdin.buffer.read()] + else: + args.images = [sys.stdin.read()] except KeyboardInterrupt: exit(0) @@ -2007,39 +2646,52 @@ values set via the --border option. if args.viewer_initial_page is not None: if args.viewer_initial_page < 1: parser.print_usage(file=sys.stderr) - logging.error("%s: error: argument --viewer-initial-page: must be " - "greater than zero" % parser.prog) + logging.error( + "%s: error: argument --viewer-initial-page: must be " + "greater than zero" % parser.prog + ) exit(2) if args.viewer_initial_page > len(args.images): parser.print_usage(file=sys.stderr) - logging.error("%s: error: argument --viewer-initial-page: must be " - "less than or equal to the total number of pages" % - parser.prog) + logging.error( + "%s: error: argument --viewer-initial-page: must be " + "less than or equal to the total number of pages" % parser.prog + ) exit(2) try: convert( - *args.images, title=args.title, author=args.author, - creator=args.creator, producer=args.producer, - creationdate=args.creationdate, moddate=args.moddate, - subject=args.subject, keywords=args.keywords, - colorspace=args.colorspace, nodate=args.nodate, - layout_fun=layout_fun, viewer_panes=args.viewer_panes, + *args.images, + title=args.title, + author=args.author, + creator=args.creator, + producer=args.producer, + creationdate=args.creationdate, + moddate=args.moddate, + subject=args.subject, + keywords=args.keywords, + colorspace=args.colorspace, + nodate=args.nodate, + layout_fun=layout_fun, + viewer_panes=args.viewer_panes, viewer_initial_page=args.viewer_initial_page, viewer_magnification=args.viewer_magnification, viewer_page_layout=args.viewer_page_layout, viewer_fit_window=args.viewer_fit_window, viewer_center_window=args.viewer_center_window, - viewer_fullscreen=args.viewer_fullscreen, with_pdfrw=not - args.without_pdfrw, outputstream=args.output, - first_frame_only=args.first_frame_only) + viewer_fullscreen=args.viewer_fullscreen, + with_pdfrw=not args.without_pdfrw, + outputstream=args.output, + first_frame_only=args.first_frame_only + ) except Exception as e: logging.error("error: " + str(e)) if logging.getLogger().isEnabledFor(logging.DEBUG): import traceback + traceback.print_exc(file=sys.stderr) exit(1) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/tests/__init__.py b/src/tests/__init__.py index c9b85e3..807aa84 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -6,7 +6,7 @@ import struct import sys import zlib from PIL import Image -from io import StringIO, BytesIO +from io import StringIO, BytesIO, TextIOWrapper HERE = os.path.dirname(__file__) @@ -17,6 +17,51 @@ if PY3: else: PdfReaderIO = BytesIO +# Recompressing the image stream makes the comparison robust against output +# preserving changes in the zlib compress output bitstream +# (e.g. between different zlib implementations/versions/releases). +# Without this, some img2pdf 0.3.2 tests fail on Fedora 29/aarch64. +# See also: +# https://gitlab.mister-muffin.de/josch/img2pdf/issues/51 +# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/R7GD4L5Z6HELCDAL2RDESWR2F3ZXHWVX/ +def recompress_last_stream(bs): + length_pos = bs.rindex(b'/Length') + li = length_pos + 8 + lj = bs.index(b' ', li) + n = int(bs[li:lj]) + stream_pos = bs.index(b'\nstream\n', lj) + si = stream_pos + 8 + sj = si + n + startx_pos = bs.rindex(b'\nstartxref\n') + xi = startx_pos + 11 + xj = bs.index(b'\n', xi) + m = int(bs[xi:xj]) + + unc_t = zlib.decompress(bs[si:sj]) + t = zlib.compress(unc_t) + + new_len = str(len(t)).encode('ascii') + u = (lj-li) + n + v = len(new_len) + len(t) + off = v - u + + rs = (bs[:li] + new_len + bs[lj:si] + t + bs[sj:xi] + + str(m+off).encode('ascii') + bs[xj:]) + + return rs + +def compare_pdf(outx, outy): + if b'/FlateDecode' in outx: + x = recompress_last_stream(outx) + y = recompress_last_stream(outy) + if x != y: + print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr) + print('recompressed outx:\n{}\nouty:\n{}\n'.format(x, y), file=sys.stderr) + return False + else: + if outx != outy: + print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr) + return True # convert +set date:create +set date:modify -define png:exclude-chunk=time @@ -433,6 +478,33 @@ def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4): ) +class CommandLineTests(unittest.TestCase): + def test_main_help(self): + if PY3: + from contextlib import redirect_stdout + f = StringIO() + with redirect_stdout(f): + try: + img2pdf.main(['img2pdf', '--help']) + except SystemExit: + pass + res = f.getvalue() + self.assertIn('img2pdf', res) + else: + # silence output + sys_stdout = sys.stdout + sys.stdout = BytesIO() + + try: + img2pdf.main(['img2pdf', '--help']) + except SystemExit: + # argparse does sys.exit(0) on --help + res = sys.stdout.getvalue() + self.assertIn('img2pdf', res) + finally: + sys.stdout = sys_stdout + + def test_suite(): class TestImg2Pdf(unittest.TestCase): pass @@ -642,7 +714,7 @@ def test_suite(): ywriter.trailer = y xwriter.write(outx) ywriter.write(outy) - self.assertEqual(outx.getvalue(), outy.getvalue()) + self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True) # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the # close() method try: @@ -656,4 +728,5 @@ def test_suite(): return unittest.TestSuite(( unittest.makeSuite(TestImg2Pdf), + unittest.makeSuite(CommandLineTests), )) diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..5b34a30 --- /dev/null +++ b/test.sh @@ -0,0 +1,1468 @@ +#!/bin/sh + +set -eu + +similar() +{ + psnr=$(compare -metric PSNR "$1" "$2" null: 2>&1 || true) + if [ -z "$psnr" ]; then + echo "compare failed" + return 1 + fi + + # PSNR of zero means that they are identical + if [ "$psnr" = 0 ]; then + echo "images are equal -- don't use similar() but require exactness" + exit 2 + fi + + # The lower PSNR value, the fewer the similarities + # The lowest (and worst) value is 1.0 + min_psnr=50 + if [ "$min_psnr" != "$( printf "$psnr\n$min_psnr\n" | sort --general-numeric-sort | head --lines=1)" ]; then + echo "pdf wrongly rendered" + return 1 + fi + return 0 +} + +compare_rendered() +{ + pdf="$1" + img="$2" + gsdevice=png16m + if [ "$#" -eq 3 ]; then + gsdevice="$3" + fi + + compare_ghostscript "$pdf" "$img" "$gsdevice" + + compare_poppler "$pdf" "$img" + + compare_mupdf "$pdf" "$img" +} + +compare_ghostscript() +{ + pdf="$1" + img="$2" + gsdevice="$3" + gs -dQUIET -dNOPAUSE -dBATCH -sDEVICE="$gsdevice" -r96 -sOutputFile="$tempdir/gs-%00d.png" "$pdf" + compare -metric AE "$img" "$tempdir/gs-1.png" null: 2>/dev/null + rm "$tempdir/gs-1.png" +} + +compare_poppler() +{ + pdf="$1" + img="$2" + pdftocairo -r 96 -png "$pdf" "$tempdir/poppler" + compare -metric AE "$img" "$tempdir/poppler-1.png" null: 2>/dev/null + rm "$tempdir/poppler-1.png" +} + +compare_mupdf() +{ + pdf="$1" + img="$2" + mutool draw -o "$tempdir/mupdf.png" -r 96 "$pdf" 2>/dev/null + compare -metric AE "$img" "$tempdir/mupdf.png" null: 2>/dev/null + rm "$tempdir/mupdf.png" +} + +compare_pdfimages() +{ + pdf="$1" + img="$2" + pdfimages -png "$pdf" "$tempdir/images" + compare -metric AE "$img" "$tempdir/images-000.png" null: 2>/dev/null + rm "$tempdir/images-000.png" +} + +error() +{ + echo test $j failed + echo intermediate data is left in $tempdir + exit 1 +} + +tempdir=$(mktemp --directory --tmpdir img2pdf.XXXXXXXXXX) + +trap error EXIT + +# we use -strip to remove all timestamps (tIME chunk and exif data) +convert -size 60x60 \( xc:none -fill red -draw 'circle 30,21 30,3' -gaussian-blur 0x3 \) \ + \( \( xc:none -fill lime -draw 'circle 39,39 36,57' -gaussian-blur 0x3 \) \ + \( xc:none -fill blue -draw 'circle 21,39 24,57' -gaussian-blur 0x3 \) \ + -compose plus -composite \ + \) -compose plus -composite \ + -strip \ + "$tempdir/alpha.png" + +convert "$tempdir/alpha.png" -background black -alpha remove -alpha off -strip "$tempdir/normal16.png" + +convert "$tempdir/normal16.png" -depth 8 -strip "$tempdir/normal.png" + +convert "$tempdir/normal.png" -negate -strip "$tempdir/inverse.png" + +convert "$tempdir/normal16.png" -colorspace Gray -depth 16 -strip "$tempdir/gray16.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 256 -depth 8 -strip "$tempdir/gray8.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 16 -depth 4 -strip "$tempdir/gray4.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 4 -depth 2 -strip "$tempdir/gray2.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 2 -depth 1 -strip "$tempdir/gray1.png" + +# use "-define png:exclude-chunk=bkgd" because otherwise, imagemagick will +# add the background color (white) as an additional entry to the palette +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 2 -define png:exclude-chunk=bkgd -strip "$tempdir/palette1.png" +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 4 -define png:exclude-chunk=bkgd -strip "$tempdir/palette2.png" +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 16 -define png:exclude-chunk=bkgd -strip "$tempdir/palette4.png" +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 256 -define png:exclude-chunk=bkgd -strip "$tempdir/palette8.png" + +cat << END | ( cd "$tempdir"; md5sum --check --status - ) +a99ef2a356c315090b6939fa4ce70516 alpha.png +0df21ebbce5292654119b17f6e52bc81 gray16.png +6faee81b8db446caa5004ad71bddcb5b gray1.png +97e423da517ede069348484a1283aa6c gray2.png +cbed1b6da5183aec0b86909e82b77c41 gray4.png +c0df42fdd69ae2a16ad0c23adb39895e gray8.png +ac6bb850fb5aaee9fa7dcb67525cd0fc inverse.png +3f3f8579f5054270e79a39e7cc4e89e0 normal16.png +cbe63b21443af8321b213bde6666951f normal.png +2f00705cca05fd94406fc39ede4d7322 palette1.png +6cb250d1915c2af99c324c43ff8286eb palette2.png +ab7b3d3907a851692ee36f5349ed0b2c palette4.png +03829af4af8776adf56ba2e68f5b111e palette8.png +END + +# use img2pdfprog environment variable if it is set +if [ -z ${img2pdfprog+x} ]; then + img2pdfprog=src/img2pdf.py +fi + +img2pdf() +{ + # we use --without-pdfrw to better "grep" the result and because we + # cannot write palette based images otherwise + $img2pdfprog --without-pdfrw --producer="" --nodate "$1" > "$2" 2>/dev/null +} + +tests=51 # number of tests +j=1 # current test + +############################################################################### +echo "Test $j/$tests JPEG" + +convert "$tempdir/normal.png" "$tempdir/normal.jpg" + +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Format: JPEG (Joint Photographic Experts Group JFIF format)$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Mime type: image/jpeg$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Compression: JPEG$' + +img2pdf "$tempdir/normal.jpg" "$tempdir/out.pdf" + +# We have to use jpegtopnm with the original JPG before being able to compare +# it with imagemagick because imagemagick will decode the JPG slightly +# differently than ghostscript, poppler and mupdf do it. +# We have to use jpegtopnm and cannot use djpeg because the latter produces +# slightly different results as well when called like this: +# djpeg -dct int -pnm "$tempdir/normal.jpg" > "$tempdir/normal.pnm" +# An alternative way to compare the JPG would be to require a different DCT +# method when decoding by setting -define jpeg:dct-method=ifast in the +# compare command. +jpegtopnm -dct int "$tempdir/normal.jpg" > "$tempdir/normal.pnm" 2>/dev/null + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.pnm" + +pdfimages -j "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jpg" "$tempdir/images-000.jpg" +rm "$tempdir/images-000.jpg" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /DCTDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jpg" "$tempdir/normal.pnm" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests JPEG (90° rotated)" + +convert "$tempdir/normal.png" "$tempdir/normal.jpg" +exiftool -overwrite_original -all= "$tempdir/normal.jpg" -n >/dev/null +exiftool -overwrite_original -Orientation=6 -XResolution=96 -YResolution=96 -n "$tempdir/normal.jpg" >/dev/null + +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Format: JPEG (Joint Photographic Experts Group JFIF format)$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Mime type: image/jpeg$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Compression: JPEG$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:Orientation: 6$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:ResolutionUnit: 2$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:XResolution: 96/1$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:YResolution: 96/1$' + +img2pdf "$tempdir/normal.jpg" "$tempdir/out.pdf" + +# We have to use jpegtopnm with the original JPG before being able to compare +# it with imagemagick because imagemagick will decode the JPG slightly +# differently than ghostscript, poppler and mupdf do it. +# We have to use jpegtopnm and cannot use djpeg because the latter produces +# slightly different results as well when called like this: +# djpeg -dct int -pnm "$tempdir/normal.jpg" > "$tempdir/normal.pnm" +# An alternative way to compare the JPG would be to require a different DCT +# method when decoding by setting -define jpeg:dct-method=ifast in the +# compare command. +jpegtopnm -dct int "$tempdir/normal.jpg" > "$tempdir/normal.pnm" 2>/dev/null +convert -rotate "90" "$tempdir/normal.pnm" "$tempdir/normal_rotated.png" +#convert -rotate "0" "$tempdir/normal.pnm" "$tempdir/normal_rotated.png" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal_rotated.png" + +pdfimages -j "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jpg" "$tempdir/images-000.jpg" +rm "$tempdir/images-000.jpg" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /DCTDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" +grep --quiet '^ /Rotate 90$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jpg" "$tempdir/normal.pnm" "$tempdir/out.pdf" "$tempdir/normal_rotated.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests JPEG CMYK" + +convert "$tempdir/normal.png" -colorspace cmyk "$tempdir/normal.jpg" + +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Format: JPEG (Joint Photographic Experts Group JFIF format)$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Mime type: image/jpeg$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Colorspace: CMYK$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Type: ColorSeparation$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Compression: JPEG$' + +img2pdf "$tempdir/normal.jpg" "$tempdir/out.pdf" + +gs -dQUIET -dNOPAUSE -dBATCH -sDEVICE=tiff32nc -r96 -sOutputFile="$tempdir/gs-%00d.tiff" "$tempdir/out.pdf" +similar "$tempdir/normal.jpg" "$tempdir/gs-1.tiff" +rm "$tempdir/gs-1.tiff" + +# not testing with poppler as it cannot write CMYK images + +mutool draw -o "$tempdir/mupdf.pam" -r 96 -c cmyk "$pdf" 2>/dev/null +similar "$tempdir/normal.jpg" "$tempdir/mupdf.pam" +rm "$tempdir/mupdf.pam" + +pdfimages -j "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jpg" "$tempdir/images-000.jpg" +rm "$tempdir/images-000.jpg" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceCMYK$' "$tempdir/out.pdf" +grep --quiet '^ /Decode \[ 1 0 1 0 1 0 1 0 \]$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /DCTDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jpg" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests JPEG2000" + +convert "$tempdir/normal.png" "$tempdir/normal.jp2" + +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Format: JP2 (JPEG-2000 File Format Syntax)$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Mime type: image/jp2$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Compression: JPEG2000$' + +img2pdf "$tempdir/normal.jp2" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.jp2" + +pdfimages -jp2 "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jp2" "$tempdir/images-000.jp2" +rm "$tempdir/images-000.jp2" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /JPXDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jp2" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +#echo Test JPEG2000 CMYK +# +# cannot test because imagemagick does not support JPEG2000 CMYK + +############################################################################### +echo "Test $j/$tests PNG RGB8" + +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.color-type-orig: 2$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.color_type: 2 (Truecolor)$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/normal.png" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.png" + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/normal.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG RGB16" + +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.color-type-orig: 2$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.color_type: 2 (Truecolor)$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/normal16.png" "$tempdir/out.pdf" + +compare_ghostscript "$tempdir/out.pdf" "$tempdir/normal16.png" tiff48nc + +# poppler outputs 8-bit RGB so the comparison will not be exact +pdftocairo -r 96 -png "$tempdir/out.pdf" "$tempdir/poppler" +similar "$tempdir/normal16.png" "$tempdir/poppler-1.png" +rm "$tempdir/poppler-1.png" + +# pdfimages is unable to write 16 bit output + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG RGBA8" + +convert "$tempdir/alpha.png" -depth 8 -strip "$tempdir/alpha8.png" + +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.color-type-orig: 6$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.color_type: 6 (RGBA)$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha8.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha8.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG RGBA16" + +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.color-type-orig: 6$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.color_type: 6 (RGBA)$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG Gray8 Alpha" + +convert "$tempdir/alpha.png" -colorspace Gray -dither FloydSteinberg -colors 256 -depth 8 -strip "$tempdir/alpha_gray8.png" + +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Type: GrayscaleAlpha$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.color-type-orig: 4$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.color_type: 4 (GrayAlpha)$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha_gray8.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha_gray8.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG Gray16 Alpha" + +convert "$tempdir/alpha.png" -colorspace Gray -depth 16 -strip "$tempdir/alpha_gray16.png" + +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Type: GrayscaleAlpha$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.color-type-orig: 4$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.color_type: 4 (GrayAlpha)$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha_gray16.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha_gray16.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG interlaced" + +convert "$tempdir/normal.png" -interlace PNG -strip "$tempdir/interlace.png" + +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.color-type-orig: 2$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.color_type: 2 (Truecolor)$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.interlace_method: 1 (Adam7 method)$' + +img2pdf "$tempdir/interlace.png" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.png" + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/normal.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/interlace.png" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests PNG Gray$i" + + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Mime type: image/png$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Colorspace: Gray$' + if [ "$i" -eq 1 ]; then + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Type: Bilevel$' + else + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Type: Grayscale$' + fi + if [ "$i" -eq 8 ]; then + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ Depth: 8-bit$" + else + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ Depth: 8/$i-bit$" + fi + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ png:IHDR.bit-depth-orig: $i$" + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ png:IHDR.bit_depth: $i$" + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ png:IHDR.color-type-orig: 0$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ png:IHDR.color_type: 0 (Grayscale)$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + + img2pdf "$tempdir/gray$i.png" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/gray$i.png" pnggray + + compare_pdfimages "$tempdir/out.pdf" "$tempdir/gray$i.png" + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests PNG Gray16" + +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Type: Grayscale$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.color-type-orig: 0$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.color_type: 0 (Grayscale)$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/gray16.png" "$tempdir/out.pdf" + +# ghostscript outputs 8-bit grayscale, so the comparison will not be exact +gs -dQUIET -dNOPAUSE -dBATCH -sDEVICE=pnggray -r96 -sOutputFile="$tempdir/gs-%00d.png" "$tempdir/out.pdf" +similar "$tempdir/gray16.png" "$tempdir/gs-1.png" +rm "$tempdir/gs-1.png" + +# poppler outputs 8-bit grayscale so the comparison will not be exact +pdftocairo -r 96 -png "$tempdir/out.pdf" "$tempdir/poppler" +similar "$tempdir/gray16.png" "$tempdir/poppler-1.png" +rm "$tempdir/poppler-1.png" + +# pdfimages outputs 8-bit grayscale so the comparison will not be exact +pdfimages -png "$tempdir/out.pdf" "$tempdir/images" +similar "$tempdir/gray16.png" "$tempdir/images-000.png" +rm "$tempdir/images-000.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests PNG Palette$i" + + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Mime type: image/png$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Type: Palette$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Depth: 8-bit$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet "^ png:IHDR.bit-depth-orig: $i$" + identify -verbose "$tempdir/palette$i.png" | grep --quiet "^ png:IHDR.bit_depth: $i$" + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ png:IHDR.color-type-orig: 3$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ png:IHDR.color_type: 3 (Indexed)$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + + img2pdf "$tempdir/palette$i.png" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/palette$i.png" + + # pdfimages cannot export palette based images + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace \[ /Indexed /DeviceRGB ' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests GIF transparent" + +convert "$tempdir/alpha.png" "$tempdir/alpha.gif" + +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Mime type: image/gif$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Type: PaletteAlpha$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Colormap entries: 256$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Compression: LZW$' + +img2pdf "$tempdir/alpha.gif" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha.gif" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests GIF Palette$i" + + convert "$tempdir/palette$i.png" "$tempdir/palette$i.gif" + + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Mime type: image/gif$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Type: Palette$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Depth: 8-bit$' + case $i in + 1) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 2$';; + 2) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 4$';; + 4) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 16$';; + 8) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 256$';; + esac + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Compression: LZW$' + + img2pdf "$tempdir/palette$i.gif" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/palette$i.png" + + # pdfimages cannot export palette based images + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace \[ /Indexed /DeviceRGB ' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" "$tempdir/palette$i.gif" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests GIF animation" + +convert "$tempdir/normal.png" "$tempdir/inverse.png" -strip "$tempdir/animation.gif" + +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Mime type: image/gif$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Type: Palette$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Colormap entries: 256$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Compression: LZW$' + +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Mime type: image/gif$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Type: Palette$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Colormap entries: 256$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Compression: LZW$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Scene: 1$' + +img2pdf "$tempdir/animation.gif" "$tempdir/out.pdf" + +if [ "$(pdfinfo "$tempdir/out.pdf" | awk '/Pages:/ {print $2}')" != 2 ]; then + echo "pdf does not have 2 pages" + exit 1 +fi + +pdfseparate "$tempdir/out.pdf" "$tempdir/page-%d.pdf" +rm "$tempdir/out.pdf" + +for page in 1 2; do + compare_rendered "$tempdir/page-$page.pdf" "$tempdir/animation.gif[$((page-1))]" + + # pdfimages cannot export palette based images + + # We cannot grep the PDF metadata here, because the page was + # rewritten into a non-greppable format by pdfseparate. but that's + # okay, because we already grepped single pages before and multipage + # PDF should not be different. + + rm "$tempdir/page-$page.pdf" +done + +rm "$tempdir/animation.gif" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF float" + +convert "$tempdir/normal.png" -depth 32 -define quantum:format=floating-point "$tempdir/float.tiff" + +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Depth: 32/8-bit$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ quantum:format: floating-point$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/float.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/float.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CMYK8" + +convert "$tempdir/normal.png" -colorspace cmyk "$tempdir/cmyk8.tiff" + +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Colorspace: CMYK$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Type: ColorSeparation$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ tiff:photometric: separated$' + +img2pdf "$tempdir/cmyk8.tiff" "$tempdir/out.pdf" + +compare_ghostscript "$tempdir/out.pdf" "$tempdir/cmyk8.tiff" tiff32nc + +# not testing with poppler as it cannot write CMYK images + +mutool draw -o "$tempdir/mupdf.pam" -r 96 -c cmyk "$pdf" 2>/dev/null +compare -metric AE "$tempdir/cmyk8.tiff" "$tempdir/mupdf.pam" null: 2>/dev/null +rm "$tempdir/mupdf.pam" + +pdfimages -tiff "$tempdir/out.pdf" "$tempdir/images" +compare -metric AE "$tempdir/cmyk8.tiff" "$tempdir/images-000.tif" null: 2>/dev/null +rm "$tempdir/images-000.tif" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceCMYK$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/cmyk8.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CMYK16" + +convert "$tempdir/normal.png" -depth 16 -colorspace cmyk "$tempdir/cmyk16.tiff" + +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Colorspace: CMYK$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Type: ColorSeparation$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ tiff:photometric: separated$' + +# PIL is unable to read 16 bit CMYK images +img2pdf "$tempdir/cmyk16.gif" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/cmyk16.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF RGB8" + +convert "$tempdir/normal.png" "$tempdir/normal.tiff" + +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/normal.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.tiff" tiff24nc + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/normal.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF RGBA8" + +convert "$tempdir/alpha.png" -depth 8 -strip "$tempdir/alpha8.tiff" + +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ tiff:alpha: unassociated$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/alpha8.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha8.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF RGBA16" + +convert "$tempdir/alpha.png" -strip "$tempdir/alpha16.tiff" + +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ tiff:alpha: unassociated$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/alpha16.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha16.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF Gray1" + +convert "$tempdir/gray1.png" -depth 1 "$tempdir/gray1.tiff" + +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Type: Bilevel$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Depth: 1-bit$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ tiff:photometric: min-is-black$' + +img2pdf "$tempdir/gray1.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/gray1.png" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/gray1.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 true$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/gray1.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +for i in 2 4 8; do + echo "Test $j/$tests TIFF Gray$i" + + convert "$tempdir/gray$i.png" -depth $i "$tempdir/gray$i.tiff" + + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Mime type: image/tiff$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Colorspace: Gray$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Type: Grayscale$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Endianess: LSB$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet "^ Depth: $i-bit$" + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ tiff:alpha: unspecified$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ tiff:endian: lsb$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ tiff:photometric: min-is-black$' + + img2pdf "$tempdir/gray$i.tiff" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/gray$i.png" pnggray + + compare_pdfimages "$tempdir/out.pdf" "$tempdir/gray$i.png" + + # When saving a PNG, PIL will store it as 8-bit data + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/gray$i.tiff" "$tempdir/out.pdf" + j=$((j+1)) +done + +################################################################################ +echo "Test $j/$tests TIFF Gray16" + +convert "$tempdir/gray16.png" -depth 16 "$tempdir/gray16.tiff" + +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Type: Grayscale$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet "^ Depth: 16-bit$" +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ tiff:photometric: min-is-black$' + +img2pdf "$tempdir/gray16.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/gray16.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF multipage" + +convert "$tempdir/normal.png" "$tempdir/inverse.png" -strip "$tempdir/multipage.tiff" + +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ tiff:photometric: RGB$' + +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ tiff:photometric: RGB$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Scene: 1$' + +img2pdf "$tempdir/multipage.tiff" "$tempdir/out.pdf" + +if [ "$(pdfinfo "$tempdir/out.pdf" | awk '/Pages:/ {print $2}')" != 2 ]; then + echo "pdf does not have 2 pages" + exit 1 +fi + +pdfseparate "$tempdir/out.pdf" "$tempdir/page-%d.pdf" +rm "$tempdir/out.pdf" + +for page in 1 2; do + compare_rendered "$tempdir/page-$page.pdf" "$tempdir/multipage.tiff[$((page-1))]" + + compare_pdfimages "$tempdir/page-$page.pdf" "$tempdir/multipage.tiff[$((page-1))]" + + # We cannot grep the PDF metadata here, because the page was + # rewritten into a non-greppable format by pdfseparate. but that's + # okay, because we already grepped single pages before and multipage + # PDF should not be different. + + rm "$tempdir/page-$page.pdf" +done + +rm "$tempdir/multipage.tiff" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests TIFF Palette$i" + + convert "$tempdir/palette$i.png" "$tempdir/palette$i.tiff" + + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Mime type: image/tiff$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Type: Palette$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Endianess: LSB$' + if [ "$i" -eq 8 ]; then + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet "^ Depth: 8-bit$" + else + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet "^ Depth: $i/8-bit$" + fi + case $i in + 1) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 2$';; + 2) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 4$';; + 4) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 16$';; + 8) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 256$';; + esac + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ tiff:alpha: unspecified$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ tiff:endian: lsb$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ tiff:photometric: palette$' + + img2pdf "$tempdir/palette$i.tiff" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/palette$i.png" + + # pdfimages cannot export palette based images + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace \[ /Indexed /DeviceRGB ' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" + + rm "$tempdir/palette$i.tiff" + j=$((j+1)) +done + +############################################################################### +for i in 12 14 16; do + echo "Test $j/$tests TIFF RGB$i" + + convert "$tempdir/normal16.png" -depth "$i" "$tempdir/normal$i.tiff" + + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Mime type: image/tiff$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Type: TrueColor$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Endianess: LSB$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet "^ Depth: $i-bit$" + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ tiff:alpha: unspecified$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ tiff:endian: lsb$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ tiff:photometric: RGB$' + + img2pdf "$tempdir/normal$i.tiff" /dev/null && rc=$? || rc=$? + if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 + fi + + rm "$tempdir/normal$i.tiff" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, little endian, msb-to-lsb, min-is-white" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=lsb -define tiff:fill-order=msb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, big endian, msb-to-lsb, min-is-white" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=msb -define tiff:fill-order=msb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: MSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: msb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, big endian, lsb-to-msb, min-is-white" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=msb -define tiff:fill-order=lsb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: lsb-to-msb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: MSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: msb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, little endian, msb-to-lsb, min-is-black" + +# We create a min-is-black group4 tiff with PIL because it creates these by +# default (and without the option to do otherwise) whereas imagemagick only +# became able to do it through commit 00730551f0a34328685c59d0dde87dd9e366103a +# See https://www.imagemagick.org/discourse-server/viewtopic.php?f=1&t=34605 +python3 -c 'from PIL import Image;Image.open("'"$tempdir/gray1.png"'").save("'"$tempdir/group4.tiff"'",format="TIFF",compression="group4")' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-black' +# PIL doesn't set those +#tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +#tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-black' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 true$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, without fillorder, samples/pixel, bits/sample" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=lsb -define tiff:fill-order=msb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +# remove BitsPerSample (258) +tiffset -u 258 "$tempdir/group4.tiff" +# remove FillOrder (266) +tiffset -u 266 "$tempdir/group4.tiff" +# remove SamplesPerPixel (277) +tiffset -u 277 "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' && exit 1 +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' && exit 1 +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' && exit 1 +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, without rows-per-strip" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=lsb -define tiff:fill-order=msb -define quantum:polarity=min-is-white -define tiff:rows-per-strip=4294967295 "$tempdir/group4.tiff" +# remove RowsPerStrip (278) +tiffset -u 278 "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Rows/Strip:' && exit 1 +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +rm "$tempdir/alpha.png" "$tempdir/normal.png" "$tempdir/inverse.png" "$tempdir/palette1.png" "$tempdir/palette2.png" "$tempdir/palette4.png" "$tempdir/palette8.png" "$tempdir/gray8.png" "$tempdir/normal16.png" "$tempdir/gray16.png" "$tempdir/gray4.png" "$tempdir/gray2.png" "$tempdir/gray1.png" +rmdir "$tempdir" + +trap - EXIT -- cgit v1.2.3 From d2db5413d9d0d63b9b14fc2037c989117804da22 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 10:58:51 +0100 Subject: debian/changelog: new entry --- debian/changelog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/debian/changelog b/debian/changelog index de18c17..1c8e09e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +img2pdf (0.3.3-1) UNRELEASED; urgency=medium + + * new upstream release + + -- Johannes 'josch' Schauer Mon, 07 Jan 2019 10:57:46 +0100 + img2pdf (0.3.2-1) unstable; urgency=medium * new upstream release -- cgit v1.2.3 From 22540181f127cd410e2b9c22a6f1bc05b8ba6d4a Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 11:02:53 +0100 Subject: debian/control: run wrap-and-sort --- debian/control | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index 27c4c6d..9a37e63 100644 --- a/debian/control +++ b/debian/control @@ -2,7 +2,13 @@ Source: img2pdf Maintainer: Johannes 'josch' Schauer Section: python Priority: optional -Build-Depends: dh-python, python3-setuptools, python3-all, debhelper (>= 11), python3-pil, python3-pdfrw, help2man +Build-Depends: debhelper (>= 11), + dh-python, + help2man, + python3-all, + python3-pdfrw, + python3-pil, + python3-setuptools Standards-Version: 3.9.8 Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ Vcs-Git: https://git.dgit.debian.org/img2pdf @@ -11,7 +17,10 @@ Homepage: https://gitlab.mister-muffin.de/josch/img2pdf Package: img2pdf Architecture: all Section: utils -Depends: ${misc:Depends}, ${python3:Depends}, python3-img2pdf (= ${binary:Version}), python3-pkg-resources +Depends: python3-img2pdf (= ${binary:Version}), + python3-pkg-resources, + ${misc:Depends}, + ${python3:Depends} Description: Lossless conversion of raster images to PDF This program will take a list of raster images and produce a PDF file with the images embedded in it. JPEG and JPEG2000 images will be included without -- cgit v1.2.3 From 3c6d69a6870e7d47816e09261dd7849a3ae93d7a Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 11:22:12 +0100 Subject: debian/upstream/signing-key.asc: export a more minimal key with --export-options export-minimal,export-clean --- debian/upstream/signing-key.asc | 252 ++++++++++++++-------------------------- 1 file changed, 84 insertions(+), 168 deletions(-) diff --git a/debian/upstream/signing-key.asc b/debian/upstream/signing-key.asc index 018940f..3bd21f3 100644 --- a/debian/upstream/signing-key.asc +++ b/debian/upstream/signing-key.asc @@ -23,172 +23,88 @@ uNgxyTcBFBAE/M2HnRCRW+1Tui1KB0gKAY09NhxX++6EbaetXqjiHBLnNBcOaSWZ FMjzo/qn7YqRljAlLwHACK03J8yvbcIfFW4uTBpmE2+Y8vR+JaegW2+Xh94OVPeH Q8y8GRkCwkZxBxNIZGjjm9MWuccGHxQtzU6balfYyAgoC1HvnSw8parqRW6kkl7h TBJVcbJkJ37XRNVtRGPmdg98xrAk2MUZOFMOKBXFQW4e3a3KNRO8zQh7x/tmfG/R -ti3yIjpHLhGrkwaMOUdAz/Fh43ZNcR8mI2PIF7Aj4eA+/B/dyua9DYkCNwQTAQgA -IQUCUdVY/QIbAwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRB9XYxgz00+tAWv -D/0Xv1rHwFJuzEQ02HUqGhHSqZVaDIrq/vxLMPKaGcUwrtGvgV79ijprUA6RlSiv -FSrcPglloPUdtR+csZ25oMYBSRDVd1B5+qqwyjYkgDwNA4/Cu+QlHevPCR8fGXnQ -kg97lQ6YKNdl1K1qm6rZzwUWobvVQK8hUF97orJzhgVU7/AA1B0lbVQyWJ7Odm+H -uBsijwCFpTDfVX3xCEdtf9AWMVD13D2vaqhmoobzyciE5cJHmVQC6CSfYs31gkIn -mVvNC1SN8/qvSxY/GvHCBV1suYhJn5Ls9LhaXR09m82sJsVRL5qEvMscBcpcDAdK -VQehl5i19mvDSNG9btwZ2E80mH57qXAW2K+h9n+5UhZk80hAsI25de+jy/Kp20L1 -nzMH/j5mEpHKMMWb1k5pZBho9Bskjbg/R9TQzfqUbAJUX/IvsruB+z/KD/CT4X2i -BFVjbnPk3E8T399rxKEfaqBqLAATjbP/VQMr3eZqs0roL6roPtVNf80wKjHk+KDA -yMBVD7caeyHrAAxAA4nIeFd9nfP3SN8MnnxEugceaAiLxyCcIfrteyftBddzOUGf -pej8LYYkrlbQuRv4+BXDya55vanBegc3bBWA4Ft0DE9QNnvRN9Tw+vvlRTrvEmqe -G/iKJYAu0jaVwq92JLSIA8gMoaP5B4LsC/4SfpIWipE7J4hGBBARAgAGBQJSFIGc -AAoJEIZFRLbFS9eYSrAAnRJc1hGFTJ0c+fPIsRHEyEYq1gu2AKCd3P6ML1JSrdjA -dnvCJrq19ygweYkCHAQQAQIABgUCUhSBugAKCRAtGqrPJEREQsn4EACLbUk7HVxk -MvXN9oluiIvery6dOMwCZnbjsuNLxQ0TtmPPIDFfvUp07TvNLP+/Uw4/KACHxv34 -+fyHJF/X6d12t2XzXtBC1ssbHvoK1oB4gnctJxYi6LgQcgxZPFRzpRC9AzvVuoyX -5sAZIq9QsGslZW5XRSS7A+ahfmrE7G87UD9DKNY5GGOWWaYbN5bqW/wDiuzxnzGB -uzY2Tb6Mq1jraZd2GwuaeM0i50hH65oueKVauR9CalOehWa9qV1/XHOvkC5ep7VQ -R1e+9jTMMz1vPazGM90nTmO+aXtgP1M+gSIIJ4K3QWf3f0GQ1jBwNTIk8gpkx+pH -siWdHsSvsexckjZ2TCgXciTeN5jYTcJNHbYKQoiHhmax5Rr/b/xDZdZzhqa/cWDV -1SPBC4zm4hNLp/xlN0sQLZBZoaco/gTfmSMNlnjAz3qbhxEZDckk4vhzngC3sQGX -n3L++9WTkDp8UO03BYHiKYnBkvZ7asQZkYOJW5pCZU2cFHlBeoFwPPDlgPr9LQla -KqZAGCW2hF8Xt3fjILo5NYcGjMimXwBlC+qsTrG5F0eiDzmHJCGp1ySfoB5V3P4E -YhwEptnYJDawI3qHaqPvBKR9cGjyLxIASJ6FSwpMtbhP9AwLhVlbMKBIatZKlTf3 -JpEEqlFmuaPTT2NGcyECiz87NgFnqxQYbIkCHAQQAQIABgUCUg/uewAKCRD7hjJR -qG+eR+oPD/4lsJby9M7xU4YZSuHA4sR8mjMm9ijaeBkf/2WVo0AEGxqA9qC+7bSn -Ly9lZVacJhrxLOducsDjodQwm0bKOX6F+sGSpoF9WmFH+6HUjrxvNLOV5P86BNJL -ucGQTL/v9ISBCRN3YZqMs0TKKbhTsSvOkEzReYvFfoShQH55qmLYVex4y4f3pBTv -fHx8if7g+aZtenXlwOEHUggp7glU3Y9pDwxAvFJcBSB398KRLub1NrWbmQwpvbYZ -ahlPJu8iSVAzCB/VsoEa207/pZin+pyhDSxgWsyB1ZOON+yDQJh9gQxHWS+PpOyc -MLfscU7rA633o40JILIILR35hNvnHRiSYn9FB/DcksmPDTrhtM5ZVFbKl3lwcgs5 -MdkqacdH90ByTMcW4RbjIte35T6ASd5qiRDpGGYoUeFBnVUrk/JtMCShK49V50+j -Ine52N/6/3b7L7sJZoK8GSM/pU+FSa6TrDQM6LNASfokkAfDy6m+OO4UT8eaq2jX -+x2L4IrnufwjG78Qhg2uLxs5/n3Y2nJJptCDPonbg/0t0NqDyJ2+tsFEQU2X3Xt8 -kXyNSrwPb2JPQ7CwxWLkO3jZYzo8u7LAPRqEEKy9ezNTV7+zRCUaDc3JTkccFsB1 -kCl58hebONButUR7uSn1c8PHkqv/1Kh29Bdgvabbr6rsr0BgZphH9IkBHAQQAQIA -BgUCUpcO6AAKCRD51nCAK/by1pIjB/4qaIR55JZ7ehhO0W+Jfu4XX8vVpt+w3TF2 -vbh5w/5BgmyPkOW8elpxDHPJYQM1S2w5TghILvcHG2dDogWFKfCOSBpdpbr4ZrsA -MTZXY3C6AiWR4JJfL/MG76yEedaON2fVjJLSL5zbwzcJ2gczHXkv+TIbqo0LnlJ1 -eTn1jn4omZD8awlUNke/Z8oWYgguqzOgtIgSW3L+Z3pFfjGr2waNO5pzikwf6k3k -R9zvDloYcv9m3g7BkSk9SeP9iNARuD40QEDIkSnA3aA3DrsL1npOY1h3ak5TC0Zw -GHfEw7SrGBy9Zo/mfJAdMPIQvFPqMV8jdZ93Sqh7gtD1RY6h6a1XiQEcBBABCgAG -BQJSlw68AAoJEHR3XKwTWKOZ7wAIALZQo2H1nfMzSS9iCfHtCy98EYG7Jmw1Wiej -2fNcgkGrVGL9onIXHGfIiIG57nnaWKdWX4pzXWkV2ojkVJrTeD/mAg8+iiTnV4Wl -Csx4YumG+SjNRE5J9Q+bj1h1WyEy3nslZX5DHmRAFzaV2s6GN1dhNdBSDDSrh6J4 -iyYCLfGM+Oqt1dozozBNri93++O9AqzmsTEDEZ+nKkuiQZekraj/Qz6RSZavqXam -V4lIZRSe0R3waB1R68P2gjMHOh5TUscvlVGWPArMD1yr4elTnIhIbMYwPjP2A8Ho -GxbVo8Dsjs/uJ8ZK1Ht5NrUZKj1cGEoPBES/CT03DQaSsml8XK20JUpvaGFubmVz -IFNjaGF1ZXIgPGouc2NoYXVlckBlbWFpbC5kZT6JAjcEEwEIACEFAlHVWuECGwMF -CwkIBwMFFQoJCAsFFgIDAQACHgECF4AACgkQfV2MYM9NPrQ1ZQ//QahNCnXvTKCy -AZBNqTrUufcmUUPqJi9rcCGy8efHMh9VmPok7YM3LSVNp22wbNmlv6TFU7klIKhJ -8DPgTvzjPpbRXcLlW2Xjt3ky45AymwK6p8ePaLOP07L4Cy86PlWzEWXLqNQtoMie -dvypUPKViK9VKCKTSryHRUwgvepmGTMYi332Hypwgjs+UjqGMvWrczmfAgGTqImU -pPqH6Eflz/Oalq+f5souo1FfL8yiXTww47Ba2YSmTVZAzYX6MeXqZPp8Xo/oGpxf -Bj0ygP9CGMW6Nzn23+3nCAcFbSO1PUs09kxMXsfvmZUxd3+Fo4czDV9e8w9NsRxX -NNhhkm3Tc4WQGDmZ+ITNKoDJGS+1UdWU+MfPah+fKZ87/O1R0+SX49a/vqTCzrHs -G2QG5jI/exFR014GdlUgc2u7NZ03dk3tb/zTZ5ByD4NPevy3SiEUsGTG0f5zf2se -SgqmSFRQhb5T4xFz3wrOQ48Pd6fHNO7cBuDtR8ZxenJrNC6FCLsvOvuKOXqve0bn -3C8Ei5rtpOWm+Hs/PfmSPO8g/u9Cp4sc2AN1ilBZovwpbxmjEt/zevrYdN05AsYo -mAPWScsirLDtloIiCN7xWbqdXWZAxVo43uWgYbRshSYpFrDyY09tN2bsb6ieWdkv -na4sst1XOpMXYk0rNLKk1CygHZpc0smJAhwEEAECAAYFAlIP7nsACgkQ+4YyUahv -nkeJKw/9G4Of0r/chxBKGwBeXZkwCjD7x/Jt0bBna0aQuBUDIUioUmFPN8fXeeqg -xY6ineq5nyPikXpPO4plWWA0KSrGMNFzAyIYfEsbcVjIpEbu9ayJxA3iHsjXB+vo -yyMylGdnsf1gomh7wzwu2CixRWfYPmBS2Z1sCEDTNBOCkKGGHUSLGEKrbDBYhX2f -0lJaFNJ7i4g2uLjbgmDsxikP0k6n6ImPBjHnSp8gRiUJJh7Vnbwf3aLS6lbAUSDz -LErCY3BwZVbR1LkmaIWqKMDaNlsm6CQhg1/FFjoEObwAkHmm6I7zyue54TdayEv5 -89T3sIxLZIN5txemuxrz3MgdiCU0mTrkGJdwxyvT3J2IUkln/lBhl3mU337cXmB6 -4OhiCq5UfwsdsalWvkNcR87aQHBq9mqpqUwdxTX6sCNo1T0WvFYsUdzX7E3DHFn9 -GZeMCW6iFw6tqfxeCG19RHsqwI4vVoS4gaG4yBI8EPi5QX4xsPMfM06Z/vGJZ+TI -xWJkG6fwg+9AS29LP0WACvyXAeVXmNEXT8jPfIoFBRBWKaGf7XJ/Bc1AVEuaxnlA -mcOZvpuMHcOP0Poo08wU4Bihm7+/aDhYcCWW4feMuDS2fRx4uj608mth6K3NsuHr -ifdoQkwVR9rKDcMSz1ehRMD+pgqhOuzhUPdfFUz+w2dSMnsCvo+IRgQQEQIABgUC -UhSBnAAKCRCGRUS2xUvXmBjoAKCllp2kNqwNfTPZ3HOoyBRraqq5mwCcDIM9R7Wk -NaRuTIxoPON+6ri2Lh+JAhwEEAECAAYFAlIUgboACgkQLRqqzyRERELDIhAAgl5W -soqpWds3aaZzYTiFtwkWRBfMrZsQI9dmKXmzzI+e/i5Z508W8Fv+IGM3zt/sQ/5j -EBDRmRc0+sBdUHb5FcvxVbQN7kzWa/5S8CNFM4e8RxeLOAJaTwwhrXt342G6msBl -iNoZlBt+8rOADQEm6GEjhjY41UCFPPp+JXf1Klvp9TlNkZou3ZXWOVSNKVlKvkM4 -w5rKbYSkOHbM2eTAHOquqP6+C0x3Y5n96bFcytvaLxpwI7IYqKT4WnX2ePvQRvIQ -nTpHQS97whMboSXbNBRfxRnpt7t8nj0KqEn/BeBgB8BYmYVObIohEGVzW5CbtBdd -fL89ONatxdr0aIZl5vPSnbOzDOb6pLzKfl71a1QUJESlWOTLOZvc3KqCMwUEngt7 -7gkO2rA+md45jXfMvk247QPahRdKC18XWU5nrMllrnhMAi3sFX5MMCMvOCmpX3Rf -CaLgm69ddFZYprpDytip4XsgalABnNuc/ukYaUwC5lwVpf6zKqe/WV1FDrRbqsIx -7tL3M6Iem68yUZSG4AGoWXoT556ClvtIJGvbfUjsIXRVsVc0N30Qf2UbyA5OLGEb -ReI6Ks7+4WpD0EDXMePCokhtSuMF2akAsmOJ5uvJXjJbxqTPpWgMVHbdBdZ9fGvh -wRUs4sOfLSqwrN96xwwt4BYxIu8O9ARBSL/sDYiJARwEEAECAAYFAlKXDugACgkQ -+dZwgCv28tZAiQf5AY/tT1QjRqNOhgZmuh1AdBq4v8+sYBhjc6wx6kFthhe1dmx0 -2DT/kCcF7TKihZ8IJtPurd8WCT6JlaGQERi6k1dk8ytjS3tHdtSaDXruPcEGdDZI -+TCSMPLcXTzhuU8+CvWRmcd2+54vaQk+bdgfkGTSfMvw7C/EGckiYdwfd8dT2GWl -GvNsyy9q4iWVOo1rseoNW0tLz53Y95WetT5TOgEzxZVT1c/9wphfd+WOmVT4AzSS -eaMB+JdYBhA/BQ3hz11PNEUraYEzSJsGruonvmx5LOze/s/ROIdk2dJycFUDt5Q+ -v7FS10rn2YixghWl8DhIgZZ62njV4VYUgbif9IkBHAQQAQoABgUCUpcOvAAKCRB0 -d1ysE1ijmWsnCACQ1m/hvBjyCOeAYGRrIOLWrqWxIHu8wbRMu7FIg2ITzKX9dPv8 -sADSNtgTnS+0D7q1LcbnkedglxMh64FGOHQ0RuQz+Y0ejSE2pLvx5MR5JXiuMrFC -nsNzcjcKR15IWoWdG9qMzDchTQQhdtUT6xR68bCs2sh+op6VAIAjrHiaUM7nDxoS -PVINj5bBx73Gg0XFwcUwo6LDPPju2nlgyabRcVhbtZuUgaGk7gnkk2oGeLEX+nWp -G7ychp/qkrPPyk00NDR2ZfVveTW3/9h9YW/AD8XoMCXnI2vjAfaPEUcpKRRn4twW -1dEqEwY8I/OEFQ1eZN0jFOsg11hkymZ7PhtgtCNKb2hhbm5lcyBTY2hhdWVyIDxq -b3NjaEBkZWJpYW4ub3JnPokCNwQTAQgAIQUCVNIfWgIbAwULCQgHAwUVCgkICwUW -AgMBAAIeAQIXgAAKCRB9XYxgz00+tEEnEACF4XXMwixZFIy2ZlpN7SCvt/mmNzFo -PZoJp5VCDk8CXH5wsXT8BhVsUNXMBdCr6N5+Cd12rDz3tkAvxKwJQhxMu6u87eri -rHWGD6SqFXMXKOLdLj2LeOUHijOYDVUxkoaGrMsM39RghfB++QuanhNXDQozLsWa -auVohhxF7EGzJ/y7evo66is8kdHHE3ZwgxYsS1eE5hKTy07MykoGCPkqLiDo5Fbn -7dNIFMGnCVkC5FvlwaaKJF2Bz9H+BMaUxgmvY4H+z/fyfUryq9xLYkFd83b0wAkN -4NGQ0qIjdd7hLd/tt1y48G6Hjk4VYz+863WOGyrNVHQQVvviiTgz9/eacp5nMyVW -cZ1eh4/PHpng5y9rTb2NoPy62eXro93/o7yWrCmuIO6ucVDj3nNyFayyfREJy/QK -aQryqP36u7pM7IBqsEpu70LWEOUgaQFpiUz+NWb575qeiSLydu6VYbhgcfkrKRBb -JbgqOojWTdci0tknNhQm/bEuZiUCj7l/9paHlo26isY4ZoqDwUgXJTKyd4/FIeZ4 -vfeQ/oY7XoRkNQ2yskyQWjSp8zImbfCsNOp7gHNgyPeUBPYVDU++m4u5B2c36KmV -Raxc6vXgEl8ExuvkZ3L0AM4vlG+Rj/nMRkMAjY4aA4LIHq63yP4Xsg+Noo5ZpHGO -wGpS+h3Ch5S3pbkCDQRR1Vj9ARAA2IZ4T9NMtm3vH/XjOa8z7q9GHhWlfh6i+7vh -EymfraFUTXu9O4JFOH7UiIVZvdRFP1yJCypZ0nEcTiiYlZMdJkk/9CcJiCoGMFmp -BndMuj4Hjp/lkOAXtKiNzUdw0gvaATdla5tB8U2OckXeeEuHov6aqX2Tgjw1/L1D -Q//72hv9l74BC4b5kpLgApv9A7oP6N4+idJLj0XpxsJlZDLSBAAAYcqYKBbMntfL -jrjr4oIw7Zrz7g3lNKHeDud6YxLnwzHuZfuD2ieD+/uvn957/TjFzF1yvNu8ip/7 -r6zL806TAJ/ndKm4SiuUo1+SiKjD30UGOqUYUj/H0nuVMp3HAGZSRA9VV4Um2b95 -2v6Qf+x8bdVz542CeIt2JAL8FR5MDmMco4ynqsmZ/Cw5Lc3zYSS0vbzO0J3FhY34 -RAy3BDf0TR/+NhipalF+Whu+OGRMs7V+XI0f2ttbc4R0X+0YGclWbf5urPZmMZ9Q -HAxfid4gP/aaHsEEgF7jACWNSrcBct4qUgfAFyg0c3xsgEjfvWvCAwrSyN4DGtDx -rA1hHPgbdZpTk4vFk4GpsPe3dLZ9uAjcteYG2f5zo6VVHIoORXKuHu6+2U3mRb3N -1MCuLWC8AN3aYM9JT4POVPI9eIxvkiXAb+eCVWjdt4LKB9QWLraA9H54Aw+yriv/ -KXL7LdcAEQEAAYkCHwQYAQgACQUCUdVY/QIbDAAKCRB9XYxgz00+tKtFEACw4W7Y -J/OYNtsyuC1fMFJLaGAP8wP4TrZ0NwTDqc7ke9LAERPVKZoQXsP0u32tBiwIRl7D -gDVvxudTPAN8z/cMxl3D+Pwcfqkm2MeN9As0yqXhp2BD+QIRJe0kAHTSWvoBoUBq -JAcDT2XhPGmu6Cgd+L6KuJlZqrflgIggNGedd5ownFFv0jR4JPt3gsqwMF3XGvEN -zVNR/4BCW5iijCZVqJy4IteQp6csw+ZT6Chd8M8+V2OlXIMqJaMMKrBedl2KcgtJ -aRGCjezQEryw/xwbgtjqGUb7nq7pGiUKnUA5d/qqw7blexiCxN1rM613WjZg/Ey1 -t4tkL7THBOl/HPW43ZM/QWQgzWA846nkydVl/VLRHf25FmfSvNvLcTGgFoeAUOKp -Q4Zg+6Iv8cUpxq0u3vbxNBXD/KVl2A5IGbb3e+ECUdL6BRyHoO4AYStAMdS3QhpZ -m5Ev9n1LzaG7fKiYPXgkvqwzup0UvKCihAoUjnEWlEOf7YihA3d2F1AojZa19ypW -DkwUow8PqggM2OqS4wgfFapGt8pAhYiCVUYqGqeVQYgFaSwQE44lVp34OFn4Vd1U -1CVH0gBsqr31C7fKUXoWlsnvZFGHg8Ibk3/pOsySGIeIBruprez+Rr0SloRZ9dHL -eSd1DTPMsgDW5tJg7mlOb8dGpXQ3FPUAVlKg57kCDQRR1VxFARAAvMCIkIxYhVv6 -vH1vxN4e9aRJ0Zf9uw1m4TAOspoRbwOAGLkGk8alI9rHMamF2is7SSLJYv5gTjmr -fHSfWBM9PgzelSEQEvqX4I8VoxhDdlzQ4oknjUpjaTk63eHG1+G1i+3CJWXqaL4y -93xarTPKBiYjbDckNKOZQeZ+aWgQrTMHKsVpR/f4g+0/RPkmmrn/7/5xWvkpg+5L -ehXc0B9ImSSEXco6i4/pGR3AvUWuFCKEQjiCM3tg7to2Hzj4REfkm9KXUpPC3pVp -Xg2NyO5rUZQi2F6FIVci2ntHpeNxO+c7C+u2wEpu7JF+FGLLzf0Zmeikpyi2DWCN -NzuxxoIiRdDAJZDGllzWgf2Yk8tN3Hbka0rbciRKug7pE9yc5WpYYN6fCxiU335c -Jl9cFrh8+OWE7Ec3mz4s0Dp3s+ykBWXPLXRKJxRseyMznYrpahvdWx+g3+0o/ZG+ -G/khINXQU2Uxesvm3t96GPTqLJosA2cS7+y9aRBERmJaIjN3jiehjVmgNgovw9hh -htnZ4fW3EIHKllaOx7izbBoHNSksaFnmT9qFOh1OqpwYran/maGKl5iP0EaTUadl -9gNeogQooMXxyeVTvAVfBqi8ie6FtPKwsqsqXINfSQKEJmNJzbj0Vj64zUaegSrK -zrVmxWEdiEqX7/BKnjav9G8ipgMHukcAEQEAAYkEPgQYAQgACQUCUdVcRQIbAgIp -CRB9XYxgz00+tMFdIAQZAQgABgUCUdVcRQAKCRDyy6XHj72D4alPD/9WgkDaUJDu -NdqO4zTN7fORZ3J7lMvnzvWGcWX9reLe/1ZaP7x8S9rbICNjRZVcf3+lZ2ipfuu0 -9eAEnoL3oyKPNYMNqj19lmQstlW7I3I1qQHTaxx65cgLnSaj86eY0NS9OH1YvXDr -PwQsx03D9V3fTfQbFDX3JJzdKu4MvHypl4POnukTr1xiZeYlr6uQf3a1yFBFCvEz -oQ2o307ZkAgTHZcAiDrUPkM6+ZPlJjetEmkYCl4a4+jWulCWDFZkv6NCTQFPkjHs -6Oe+/Zz3hdh8HuyBnbhKnRU/nnd8+My7hxRcpNHl+JEvVs1YEA9MZ3W3peRTN+hS -ei1qG+qLcMklemfhp0kzMnnidm2On/74Zv4YILGF8MVlC8tc8sl940ZyK4YRJU3j -GX2jUbqxv2iE1KcOEChHxNENyU43wjz3k6xBuUPAYB2oydrtLDMTqEH/0rv7PzYg -QomckLrN+PIPZa4xBR05E+fsy4fVm4dqQ+WUf7MLyWWGjMDzsLD3mXIsDy/ovNCp -e0xwukbUCqnedxVsWmWZfAYkxIFmfM65lyB6pVQnQn0EpVQoM6GN+db6xzlEw3nM -Bbsvte1MxTrdAUVyJJeIrjU8Pf3BpV+j3FX/+6uABltJcboo0BzLU2MyV9XRLwaH -WqSMgBreKDwlMgHndaEjYU9ldhezHrZ9ceXdD/9kjhuIgqyL0E/WoUTP2FXSxKdQ -i3yv4FGgFwGEb87MIGN6y2ebbPDUwUqRRwJOb1Kl+8DvtwQmtQ92VYfvb5eTE++H -PCcuRYF1ObVk5lsm9a6VQ5jjv4mxWXtg2FO+T14iywnip3QRYSZ0VUMBa3IQh7Ka -zPlt+JI9+rx2ZKT25NL+S6FT1seGHjTh2WukQ9yDMU5XDHqZbfcdwCxG1qxN2oof -fyJlsF5o+nWZhNcas/Ib8oqzSWwB1ODR+NSqajgrtBrVz5rczuCMwZ6Mo+YGdAw0 -viE9IrX56O45BFo2dt3e/1Vi4Vu0b8D0TfolvTQPEAYuFb+GZyy9h1ngPjxcsrKZ -g5rN5E+1gnhOwEab7cFnOBcecdBdGsbeIlkR4zUT5cdOc/ThINNoMIgCvTdgyYr+ -dY5DXgrtCKEN2bEhveDlCzhNeaHsHfewG18YWVrSm3SU1g/He/zkiVY4FCwM3pAu -kz8oAS55d1Kkr/8l753elvHZ/v28P+T/X9l4XJD8VIWP6clwDCZ9kc6vPGFNXiKh -nxG9rwUrcqQkaEnBbnw3QhdoTeTH5/aCnhdogHN0yX5elQwYVow36SeNx+IJ1P42 -2I7hkp8x/R0QD88klwGAv8FplZDOc/TNlRfGR38oNQE1neCN94+Bkyej++NjRix1 -kLUbeNIoVx98FilMTg== -=GO5J +ti3yIjpHLhGrkwaMOUdAz/Fh43ZNcR8mI2PIF7Aj4eA+/B/dyua9DbQlSm9oYW5u +ZXMgU2NoYXVlciA8ai5zY2hhdWVyQGVtYWlsLmRlPokCNwQTAQgAIQUCUdVa4QIb +AwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAAKCRB9XYxgz00+tDVlD/9BqE0Kde9M +oLIBkE2pOtS59yZRQ+omL2twIbLx58cyH1WY+iTtgzctJU2nbbBs2aW/pMVTuSUg +qEnwM+BO/OM+ltFdwuVbZeO3eTLjkDKbArqnx49os4/TsvgLLzo+VbMRZcuo1C2g +yJ52/KlQ8pWIr1UoIpNKvIdFTCC96mYZMxiLffYfKnCCOz5SOoYy9atzOZ8CAZOo +iZSk+ofoR+XP85qWr5/myi6jUV8vzKJdPDDjsFrZhKZNVkDNhfox5epk+nxej+ga +nF8GPTKA/0IYxbo3Ofbf7ecIBwVtI7U9SzT2TExex++ZlTF3f4WjhzMNX17zD02x +HFc02GGSbdNzhZAYOZn4hM0qgMkZL7VR1ZT4x89qH58pnzv87VHT5Jfj1r++pMLO +sewbZAbmMj97EVHTXgZ2VSBza7s1nTd2Te1v/NNnkHIPg096/LdKIRSwZMbR/nN/ +ax5KCqZIVFCFvlPjEXPfCs5Djw93p8c07twG4O1HxnF6cms0LoUIuy86+4o5eq97 +RufcLwSLmu2k5ab4ez89+ZI87yD+70KnixzYA3WKUFmi/ClvGaMS3/N6+th03TkC +xiiYA9ZJyyKssO2WgiII3vFZup1dZkDFWjje5aBhtGyFJikWsPJjT203ZuxvqJ5Z +2S+driyy3Vc6kxdiTSs0sqTULKAdmlzSybQjSm9oYW5uZXMgU2NoYXVlciA8am9z +Y2hAZGViaWFuLm9yZz6JAjcEEwEIACEFAlTSH1oCGwMFCwkIBwMFFQoJCAsFFgID +AQACHgECF4AACgkQfV2MYM9NPrRBJxAAheF1zMIsWRSMtmZaTe0gr7f5pjcxaD2a +CaeVQg5PAlx+cLF0/AYVbFDVzAXQq+jefgnddqw897ZAL8SsCUIcTLurvO3q4qx1 +hg+kqhVzFyji3S49i3jlB4ozmA1VMZKGhqzLDN/UYIXwfvkLmp4TVw0KMy7Fmmrl +aIYcRexBsyf8u3r6OuorPJHRxxN2cIMWLEtXhOYSk8tOzMpKBgj5Ki4g6ORW5+3T +SBTBpwlZAuRb5cGmiiRdgc/R/gTGlMYJr2OB/s/38n1K8qvcS2JBXfN29MAJDeDR +kNKiI3Xe4S3f7bdcuPBuh45OFWM/vOt1jhsqzVR0EFb74ok4M/f3mnKeZzMlVnGd +XoePzx6Z4Ocva029jaD8utnl66Pd/6O8lqwpriDurnFQ495zchWssn0RCcv0CmkK +8qj9+ru6TOyAarBKbu9C1hDlIGkBaYlM/jVm+e+anoki8nbulWG4YHH5KykQWyW4 +KjqI1k3XItLZJzYUJv2xLmYlAo+5f/aWh5aNuorGOGaKg8FIFyUysnePxSHmeL33 +kP6GO16EZDUNsrJMkFo0qfMyJm3wrDTqe4BzYMj3lAT2FQ1PvpuLuQdnN+iplUWs +XOr14BJfBMbr5Gdy9ADOL5RvkY/5zEZDAI2OGgOCyB6ut8j+F7IPjaKOWaRxjsBq +UvodwoeUt6W5Ag0EUdVY/QEQANiGeE/TTLZt7x/14zmvM+6vRh4VpX4eovu74RMp +n62hVE17vTuCRTh+1IiFWb3URT9ciQsqWdJxHE4omJWTHSZJP/QnCYgqBjBZqQZ3 +TLo+B46f5ZDgF7Sojc1HcNIL2gE3ZWubQfFNjnJF3nhLh6L+mql9k4I8Nfy9Q0P/ ++9ob/Ze+AQuG+ZKS4AKb/QO6D+jePonSS49F6cbCZWQy0gQAAGHKmCgWzJ7Xy464 +6+KCMO2a8+4N5TSh3g7nemMS58Mx7mX7g9ong/v7r5/ee/04xcxdcrzbvIqf+6+s +y/NOkwCf53SpuEorlKNfkoiow99FBjqlGFI/x9J7lTKdxwBmUkQPVVeFJtm/edr+ +kH/sfG3Vc+eNgniLdiQC/BUeTA5jHKOMp6rJmfwsOS3N82EktL28ztCdxYWN+EQM +twQ39E0f/jYYqWpRflobvjhkTLO1flyNH9rbW3OEdF/tGBnJVm3+bqz2ZjGfUBwM +X4neID/2mh7BBIBe4wAljUq3AXLeKlIHwBcoNHN8bIBI371rwgMK0sjeAxrQ8awN +YRz4G3WaU5OLxZOBqbD3t3S2fbgI3LXmBtn+c6OlVRyKDkVyrh7uvtlN5kW9zdTA +ri1gvADd2mDPSU+DzlTyPXiMb5IlwG/nglVo3beCygfUFi62gPR+eAMPsq4r/yly ++y3XABEBAAGJAh8EGAEIAAkFAlHVWP0CGwwACgkQfV2MYM9NPrSrRRAAsOFu2Cfz +mDbbMrgtXzBSS2hgD/MD+E62dDcEw6nO5HvSwBET1SmaEF7D9Lt9rQYsCEZew4A1 +b8bnUzwDfM/3DMZdw/j8HH6pJtjHjfQLNMql4adgQ/kCESXtJAB00lr6AaFAaiQH +A09l4TxprugoHfi+iriZWaq35YCIIDRnnXeaMJxRb9I0eCT7d4LKsDBd1xrxDc1T +Uf+AQluYoowmVaicuCLXkKenLMPmU+goXfDPPldjpVyDKiWjDCqwXnZdinILSWkR +go3s0BK8sP8cG4LY6hlG+56u6RolCp1AOXf6qsO25XsYgsTdazOtd1o2YPxMtbeL +ZC+0xwTpfxz1uN2TP0FkIM1gPOOp5MnVZf1S0R39uRZn0rzby3ExoBaHgFDiqUOG +YPuiL/HFKcatLt728TQVw/ylZdgOSBm293vhAlHS+gUch6DuAGErQDHUt0IaWZuR +L/Z9S82hu3yomD14JL6sM7qdFLygooQKFI5xFpRDn+2IoQN3dhdQKI2WtfcqVg5M +FKMPD6oIDNjqkuMIHxWqRrfKQIWIglVGKhqnlUGIBWksEBOOJVad+DhZ+FXdVNQl +R9IAbKq99Qu3ylF6FpbJ72RRh4PCG5N/6TrMkhiHiAa7qa3s/ka9EpaEWfXRy3kn +dQ0zzLIA1ubSYO5pTm/HRqV0NxT1AFZSoOe5Ag0EUdVcRQEQALzAiJCMWIVb+rx9 +b8TeHvWkSdGX/bsNZuEwDrKaEW8DgBi5BpPGpSPaxzGphdorO0kiyWL+YE45q3x0 +n1gTPT4M3pUhEBL6l+CPFaMYQ3Zc0OKJJ41KY2k5Ot3hxtfhtYvtwiVl6mi+Mvd8 +Wq0zygYmI2w3JDSjmUHmfmloEK0zByrFaUf3+IPtP0T5Jpq5/+/+cVr5KYPuS3oV +3NAfSJkkhF3KOouP6RkdwL1FrhQihEI4gjN7YO7aNh84+ERH5JvSl1KTwt6VaV4N +jcjua1GUIthehSFXItp7R6XjcTvnOwvrtsBKbuyRfhRiy839GZnopKcotg1gjTc7 +scaCIkXQwCWQxpZc1oH9mJPLTdx25GtK23IkSroO6RPcnOVqWGDenwsYlN9+XCZf +XBa4fPjlhOxHN5s+LNA6d7PspAVlzy10SicUbHsjM52K6Wob3VsfoN/tKP2Rvhv5 +ISDV0FNlMXrL5t7fehj06iyaLANnEu/svWkQREZiWiIzd44noY1ZoDYKL8PYYYbZ +2eH1txCBypZWjse4s2waBzUpLGhZ5k/ahTodTqqcGK2p/5mhipeYj9BGk1GnZfYD +XqIEKKDF8cnlU7wFXwaovInuhbTysLKrKlyDX0kChCZjSc249FY+uM1GnoEqys61 +ZsVhHYhKl+/wSp42r/RvIqYDB7pHABEBAAGJBD4EGAEIAAkFAlHVXEUCGwICKQkQ +fV2MYM9NPrTBXSAEGQEIAAYFAlHVXEUACgkQ8sulx4+9g+GpTw//VoJA2lCQ7jXa +juM0ze3zkWdye5TL5871hnFl/a3i3v9WWj+8fEva2yAjY0WVXH9/pWdoqX7rtPXg +BJ6C96MijzWDDao9fZZkLLZVuyNyNakB02sceuXIC50mo/OnmNDUvTh9WL1w6z8E +LMdNw/Vd3030GxQ19ySc3SruDLx8qZeDzp7pE69cYmXmJa+rkH92tchQRQrxM6EN +qN9O2ZAIEx2XAIg61D5DOvmT5SY3rRJpGApeGuPo1rpQlgxWZL+jQk0BT5Ix7Ojn +vv2c94XYfB7sgZ24Sp0VP553fPjMu4cUXKTR5fiRL1bNWBAPTGd1t6XkUzfoUnot +ahvqi3DJJXpn4adJMzJ54nZtjp/++Gb+GCCxhfDFZQvLXPLJfeNGciuGESVN4xl9 +o1G6sb9ohNSnDhAoR8TRDclON8I895OsQblDwGAdqMna7SwzE6hB/9K7+z82IEKJ +nJC6zfjyD2WuMQUdORPn7MuH1ZuHakPllH+zC8llhozA87Cw95lyLA8v6LzQqXtM +cLpG1Aqp3ncVbFplmXwGJMSBZnzOuZcgeqVUJ0J9BKVUKDOhjfnW+sc5RMN5zAW7 +L7XtTMU63QFFciSXiK41PD39waVfo9xV//urgAZbSXG6KNAcy1NjMlfV0S8Gh1qk +jIAa3ig8JTIB53WhI2FPZXYXsx62fXHl3Q//ZI4biIKsi9BP1qFEz9hV0sSnUIt8 +r+BRoBcBhG/OzCBjestnm2zw1MFKkUcCTm9SpfvA77cEJrUPdlWH72+XkxPvhzwn +LkWBdTm1ZOZbJvWulUOY47+JsVl7YNhTvk9eIssJ4qd0EWEmdFVDAWtyEIeymsz5 +bfiSPfq8dmSk9uTS/kuhU9bHhh404dlrpEPcgzFOVwx6mW33HcAsRtasTdqKH38i +ZbBeaPp1mYTXGrPyG/KKs0lsAdTg0fjUqmo4K7Qa1c+a3M7gjMGejKPmBnQMNL4h +PSK1+ejuOQRaNnbd3v9VYuFbtG/A9E36Jb00DxAGLhW/hmcsvYdZ4D48XLKymYOa +zeRPtYJ4TsBGm+3BZzgXHnHQXRrG3iJZEeM1E+XHTnP04SDTaDCIAr03YMmK/nWO +Q14K7QihDdmxIb3g5Qs4TXmh7B33sBtfGFla0pt0lNYPx3v85IlWOBQsDN6QLpM/ +KAEueXdSpK//Je+d3pbx2f79vD/k/1/ZeFyQ/FSFj+nJcAwmfZHOrzxhTV4ioZ8R +va8FK3KkJGhJwW58N0IXaE3kx+f2gp4XaIBzdMl+XpUMGFaMN+knjcfiCdT+NtiO +4ZKfMf0dEA/PJJcBgL/BaZWQznP0zZUXxkd/KDUBNZ3gjfePgZMno/vjY0YsdZC1 +G3jSKFcffBYpTE4= +=UNfd -----END PGP PUBLIC KEY BLOCK----- -- cgit v1.2.3 From 0eaaf58cb887f9b2e628e14ff9de9b3169283457 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 10:59:01 +0100 Subject: debian/rules: run test.sh during build --- debian/control | 7 +++++++ debian/rules | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/debian/control b/debian/control index 9a37e63..120cab0 100644 --- a/debian/control +++ b/debian/control @@ -4,7 +4,14 @@ Section: python Priority: optional Build-Depends: debhelper (>= 11), dh-python, + ghostscript , help2man, + imagemagick , + libimage-exiftool-perl , + libtiff-tools , + mupdf-tools , + netpbm , + poppler-utils , python3-all, python3-pdfrw, python3-pil, diff --git a/debian/rules b/debian/rules index 76520dd..b525322 100755 --- a/debian/rules +++ b/debian/rules @@ -10,3 +10,9 @@ override_dh_auto_clean: override_dh_auto_build: dh_auto_build help2man --no-info --name="lossless conversion of raster images to pdf" ./src/img2pdf.py -o img2pdf.1 + +override_dh_auto_test: + dh_auto_test +ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS) $(DEB_BUILD_PROFILES))) + ./test.sh +endif -- cgit v1.2.3 From 084ce758d651b12edfb5eda74af9ae8ab41297da Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 12:49:15 +0100 Subject: add test.sh autopkgtest --- debian/tests/control | 4 ++++ debian/tests/test-sh | 3 +++ 2 files changed, 7 insertions(+) create mode 100755 debian/tests/test-sh diff --git a/debian/tests/control b/debian/tests/control index d46241f..ac75dd0 100644 --- a/debian/tests/control +++ b/debian/tests/control @@ -1,3 +1,7 @@ Tests: default Restrictions: allow-stderr Depends: @, python3-pdfrw + +Tests: test-sh +Restrictions: allow-stderr +Depends: @, ghostscript, imagemagick, libimage-exiftool-perl, libtiff-tools, mupdf-tools, netpbm, poppler-utils diff --git a/debian/tests/test-sh b/debian/tests/test-sh new file mode 100755 index 0000000..fbdf1be --- /dev/null +++ b/debian/tests/test-sh @@ -0,0 +1,3 @@ +#!/bin/sh + +img2pdfprog=/usr/bin/img2pdf sh -x ./test.sh -- cgit v1.2.3 From 3b5552c30a1a400249a1c28f82fedcdc039ddeb3 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 12:54:43 +0100 Subject: debian/control: bump Standards-Version to 4.3.0 --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 120cab0..9a5b6e1 100644 --- a/debian/control +++ b/debian/control @@ -16,7 +16,7 @@ Build-Depends: debhelper (>= 11), python3-pdfrw, python3-pil, python3-setuptools -Standards-Version: 3.9.8 +Standards-Version: 4.3.0 Vcs-Browser: https://browse.dgit.debian.org/img2pdf.git/ Vcs-Git: https://git.dgit.debian.org/img2pdf Homepage: https://gitlab.mister-muffin.de/josch/img2pdf -- cgit v1.2.3 From 59415c13b74b5f1d10387a7caa538aa27ff6baa7 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Mon, 7 Jan 2019 15:24:46 +0100 Subject: upload version 0.3.3-1 to unstable --- debian/changelog | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index 1c8e09e..8e38055 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,14 @@ -img2pdf (0.3.3-1) UNRELEASED; urgency=medium +img2pdf (0.3.3-1) unstable; urgency=medium * new upstream release - - -- Johannes 'josch' Schauer Mon, 07 Jan 2019 10:57:46 +0100 + * debian/control: run wrap-and-sort + * debian/upstream/signing-key.asc: export a more minimal key with + --export-options export-minimal,export-clean + * debian/rules: run test.sh during build + * add test.sh autopkgtest + * debian/control: bump Standards-Version to 4.3.0 + + -- Johannes 'josch' Schauer Mon, 07 Jan 2019 15:24:31 +0100 img2pdf (0.3.2-1) unstable; urgency=medium -- cgit v1.2.3