From 140ed3b81e844b06f82bb5819fe335b514b2aed4 Mon Sep 17 00:00:00 2001 From: Johannes 'josch' Schauer Date: Sun, 5 Apr 2020 20:23:55 +0200 Subject: Import Upstream version 0.3.3 --- CHANGES.rst | 133 ++ LICENSE | 165 ++ MANIFEST.in | 11 + PKG-INFO | 245 +++ README.md | 218 +++ setup.cfg | 7 + setup.py | 62 + src/img2pdf.egg-info/PKG-INFO | 245 +++ src/img2pdf.egg-info/SOURCES.txt | 35 + src/img2pdf.egg-info/dependency_links.txt | 1 + src/img2pdf.egg-info/entry_points.txt | 4 + src/img2pdf.egg-info/pbr.json | 1 + src/img2pdf.egg-info/requires.txt | 4 + src/img2pdf.egg-info/top_level.txt | 2 + src/img2pdf.egg-info/zip-safe | 1 + src/img2pdf.py | 2697 +++++++++++++++++++++++++++++ src/jp2.py | 125 ++ src/tests/__init__.py | 732 ++++++++ src/tests/input/CMYK.jpg | Bin 0 -> 4788 bytes src/tests/input/CMYK.tif | Bin 0 -> 22286 bytes src/tests/input/animation.gif | Bin 0 -> 1930 bytes src/tests/input/gray.png | Bin 0 -> 814 bytes src/tests/input/mono.png | Bin 0 -> 444 bytes src/tests/input/mono.tif | Bin 0 -> 720 bytes src/tests/input/normal.jpg | Bin 0 -> 2348 bytes src/tests/input/normal.png | Bin 0 -> 4992 bytes src/tests/output/CMYK.jpg.pdf | Bin 0 -> 5558 bytes src/tests/output/CMYK.tif.pdf | Bin 0 -> 1722 bytes src/tests/output/animation.gif.pdf | Bin 0 -> 6070 bytes src/tests/output/gray.png.pdf | Bin 0 -> 1329 bytes src/tests/output/mono.png.pdf | Bin 0 -> 958 bytes src/tests/output/mono.tif.pdf | Bin 0 -> 915 bytes src/tests/output/normal.jpg.pdf | Bin 0 -> 3089 bytes src/tests/output/normal.png.pdf | Bin 0 -> 1670 bytes test.sh | 1468 ++++++++++++++++ test_comp.sh | 32 + 36 files changed, 6188 insertions(+) create mode 100644 CHANGES.rst create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 PKG-INFO create mode 100644 README.md create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 src/img2pdf.egg-info/PKG-INFO create mode 100644 src/img2pdf.egg-info/SOURCES.txt create mode 100644 src/img2pdf.egg-info/dependency_links.txt create mode 100644 src/img2pdf.egg-info/entry_points.txt create mode 100644 src/img2pdf.egg-info/pbr.json create mode 100644 src/img2pdf.egg-info/requires.txt create mode 100644 src/img2pdf.egg-info/top_level.txt create mode 100644 src/img2pdf.egg-info/zip-safe create mode 100755 src/img2pdf.py create mode 100644 src/jp2.py create mode 100644 src/tests/__init__.py create mode 100644 src/tests/input/CMYK.jpg create mode 100644 src/tests/input/CMYK.tif create mode 100644 src/tests/input/animation.gif create mode 100644 src/tests/input/gray.png create mode 100644 src/tests/input/mono.png create mode 100644 src/tests/input/mono.tif create mode 100644 src/tests/input/normal.jpg create mode 100644 src/tests/input/normal.png create mode 100644 src/tests/output/CMYK.jpg.pdf create mode 100644 src/tests/output/CMYK.tif.pdf create mode 100644 src/tests/output/animation.gif.pdf create mode 100644 src/tests/output/gray.png.pdf create mode 100644 src/tests/output/mono.png.pdf create mode 100644 src/tests/output/mono.tif.pdf create mode 100644 src/tests/output/normal.jpg.pdf create mode 100644 src/tests/output/normal.png.pdf create mode 100755 test.sh create mode 100755 test_comp.sh diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..a9ab56b --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,133 @@ +======= +CHANGES +======= + +0.3.3 (2019-01-07) +------------------ + + - restore basic support for Python 2 + - also ship test.sh + - add legal and tabloid paper formats + - respect exif rotation tag + +0.3.2 (2018-11-20) +------------------ + + - support big endian TIFF with lsb-to-msb FillOrder + - support multipage CCITT Group 4 TIFF + - also reject palette images with transparency + - support PNG images with 1, 2, 4 or 16 bits per sample + - support multipage TIFF with differently encoded images + - support CCITT Group4 TIFF without rows-per-strip + - add extensive test suite + +0.3.1 (2018-08-04) +------------------ + + - Directly copy data from CCITT Group 4 encoded TIFF images into the PDF + container without re-encoding + +0.3.0 (2018-06-18) +------------------ + + - Store non-jpeg images using PNG compression + - Support arbitrarily large pages via PDF /UserUnit field + - Disallow input with alpha channel as it cannot be preserved + - Add option --pillow-limit-break to support very large input + +0.2.4 (2017-05-23) +------------------ + + - Restore support for Python 2.7 + - Add support for PyPy + - Add support for testing using tox + +0.2.3 (2017-01-20) +------------------ + + - version number bump for botched pypi upload... + +0.2.2 (2017-01-20) +------------------ + + - automatic monochrome CCITT Group4 encoding via Pillow/libtiff + +0.2.1 (2016-05-04) +------------------ + + - set img2pdf as /producer value + - support multi-frame images like multipage TIFF and animated GIF + - support for palette images like GIF + - support all colorspaces and imageformats known by PIL + - read horizontal and vertical dpi from JPEG2000 files + +0.2.0 (2015-05-10) +------------------ + + - now Python3 only + - pep8 compliant code + - update my email to josch@mister-muffin.de + - move from github to gitlab.mister-muffin.de/josch/img2pdf + - use logging module + - add extensive test suite + - ability to read from standard input + - pdf writer: + - make more compatible with the interface of pdfrw module + - print floats which equal to their integer conversion as integer + - do not print trailing zeroes for floating point numbers + - print more linebreaks + - add binary string at beginning of PDF to indicate that the PDF + contains binary data + - handle datetime and unicode strings by using utf-16-be encoding + - new options (see --help for more details): + - --without-pdfrw + - --imgsize + - --border + - --fit + - --auto-orient + - --viewer-panes + - --viewer-initial-page + - --viewer-magnification + - --viewer-page-layout + - --viewer-fit-window + - --viewer-center-window + - --viewer-fullscreen + - remove short options for metadata command line arguments + - correctly encode and escape non-ascii metadata + - explicitly store date in UTC and allow parsing all date formats understood + by dateutil and `date --date` + +0.1.5 (2015-02-16) +------------------ + +- Enable support for CMYK images +- Rework test suite +- support file objects as input + +0.1.4 (2015-01-21) +------------------ + +- add Python 3 support +- make output reproducible by sorting and --nodate option + +0.1.3 (2014-11-10) +------------------ + +- Avoid leaking file descriptors +- Convert unrecognized colorspaces to RGB + +0.1.1 (2014-09-07) +------------------ + +- allow running src/img2pdf.py standalone +- license change from GPL to LGPL +- Add pillow 2.4.0 support +- add options to specify pdf dimensions in points + +0.1.0 (2014-03-14, unreleased) +------------------ + +- Initial PyPI release. +- Modified code to create proper package. +- Added tests. +- Added console script entry point. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0a04128 --- /dev/null +++ b/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..d86af25 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,11 @@ +include README.md +include test_comp.sh +include test.sh +include CHANGES.rst +include LICENSE +recursive-include src *.jpg +recursive-include src *.pdf +recursive-include src *.png +recursive-include src *.tif +recursive-include src *.gif +recursive-include src *.py diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..7553591 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,245 @@ +Metadata-Version: 2.1 +Name: img2pdf +Version: 0.3.3 +Summary: Convert images to PDF via direct JPEG inclusion. +Home-page: https://gitlab.mister-muffin.de/josch/img2pdf +Author: Johannes 'josch' Schauer +Author-email: josch@mister-muffin.de +License: LGPL +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.3 +Description: img2pdf + ======= + + Lossless conversion of raster images to PDF. You should use img2pdf if your + priorities are (in this order): + + 1. **always lossless**: the image embedded in the PDF will always have the + exact same color information for every pixel as the input + 2. **small**: if possible, the difference in filesize between the input image + and the output PDF will only be the overhead of the PDF container itself + 3. **fast**: if possible, the input image is just pasted into the PDF document + as-is without any CPU hungry re-encoding of the pixel data + + Conventional conversion software (like ImageMagick) would either: + + 1. not be lossless because lossy re-encoding to JPEG + 2. not be small because using wasteful flate encoding of raw pixel data + 3. not be fast because input data gets re-encoded + + Another advantage of not having to re-encode the input (in most common + situations) is, that img2pdf is able to handle much larger input than other + software, because the raw pixel data never has to be loaded into memory. + + The following table shows how img2pdf handles different input depending on the + input file format and image color space. + + | Format | Colorspace | Result | + | -------------------- | ------------------------------ | ------------- | + | JPEG | any | direct | + | JPEG2000 | any | direct | + | PNG (non-interlaced) | any | direct | + | TIFF (CCITT Group 4) | monochrome | direct | + | any | any except CMYK and monochrome | PNG Paeth | + | any | monochrome | CCITT Group 4 | + | any | CMYK | flate | + + For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4 + encoded data, img2pdf directly embeds the image data into the PDF without + re-encoding it. It thus treats the PDF format merely as a container format for + the image data. In these cases, img2pdf only increases the filesize by the size + of the PDF container (typically around 500 to 700 bytes). Since data is only + copied and not re-encoded, img2pdf is also typically faster than other + solutions for these input formats. + + For all other input types, img2pdf first has to transform the pixel data to + make it compatible with PDF. In most cases, the PNG Paeth filter is applied to + the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for + CMYK input no filter is applied before finally applying flate compression. + + Usage + ----- + + The images must be provided as files because img2pdf needs to seek in the file + descriptor. + + If no output file is specified with the `-o`/`--output` option, output will be + done to stdout. A typical invocation is: + + $ img2pdf img1.png img2.jpg -o out.pdf + + The detailed documentation can be accessed by running: + + $ img2pdf --help + + Bugs + ---- + + - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, + when embedded into the PDF cannot be read by the Adobe Acrobat Reader, + please contact me. + + - I have not yet figured out how to determine the colorspace of JPEG2000 + files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 + files with other colorspaces, you must explicitly specify it using the + `--colorspace` option. + + - Input images with alpha channels are not allowed. PDF doesn't support alpha + channels in images and thus, the alpha channel of the input would have to be + discarded. But img2pdf will always be lossless and thus, input images must + not carry transparency information. + + - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the + input if necessary. To prevent decompression bomb denial of service attacks, + Pillow limits the maximum number of pixels an input image is allowed to + have. If you are sure that you know what you are doing, then you can disable + this safeguard by passing the `--pillow-limit-break` option to img2pdf. This + allows one to process even very large input images. + + Installation + ------------ + + On a Debian- and Ubuntu-based systems, img2pdf can be installed from the + official repositories: + + $ apt install img2pdf + + If you want to install it using pip, you can run: + + $ pip3 install img2pdf + + If you prefer to install from source code use: + + $ cd img2pdf/ + $ pip3 install . + + To test the console script without installing the package on your system, + use virtualenv: + + $ cd img2pdf/ + $ virtualenv ve + $ ve/bin/pip3 install . + + You can then test the converter using: + + $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg + + The package can also be used as a library: + + import img2pdf + + # opening from filename + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg')) + + # opening from file handle + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + f1.write(img2pdf.convert(f2)) + + # using in-memory image data + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("\x89PNG...") + + # multiple inputs (variant 1) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("test1.jpg", "test2.png")) + + # multiple inputs (variant 2) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert(["test1.jpg", "test2.png"])) + + # writing to file descriptor + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + img2pdf.convert(f2, outputstream=f1) + + # specify paper size (A4) + a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) + layout_fun = img2pdf.get_layout_fun(a4inpt) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) + + Comparison to ImageMagick + ------------------------- + + Create a large test image: + + $ convert logo: -resize 8000x original.jpg + + Convert it into PDF using ImageMagick and img2pdf: + + $ time img2pdf original.jpg -o img2pdf.pdf + $ time convert original.jpg imagemagick.pdf + + Notice how ImageMagick took an order of magnitude longer to do the conversion + than img2pdf. It also used twice the memory. + + Now extract the image data from both PDF documents and compare it to the + original: + + $ pdfimages -all img2pdf.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 0 + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 118716 + + To get lossless output with ImageMagick we can use Zip compression but that + unnecessarily increases the size of the output: + + $ convert original.jpg -compress Zip imagemagick.pdf + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.png null: + 0 + $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf + 1535837 original.jpg + 1536683 img2pdf.pdf + 9397809 imagemagick.pdf + + Comparison to pdfLaTeX + ---------------------- + + pdfLaTeX performs a lossless conversion from included images to PDF by default. + If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same + way as img2pdf does it. But for other image formats it uses flate compression + of the plain pixel data and thus needlessly increases the output file size: + + $ convert logo: -resize 8000x original.png + $ cat << END > pdflatex.tex + \documentclass{article} + \usepackage{graphicx} + \begin{document} + \includegraphics{original.png} + \end{document} + END + $ pdflatex pdflatex.tex + $ stat --format="%s %n" original.png pdflatex.pdf + 4500182 original.png + 9318120 pdflatex.pdf + + Comparison to Tesseract OCR + --------------------------- + + Tesseract OCR comes closest to the functionality img2pdf provides. It is able + to convert JPEG and PNG input to PDF without needlessly increasing the filesize + and is at the same time lossless. So if your input is JPEG and PNG images, then + you should safely be able to use Tesseract instead of img2pdf. For other input, + Tesseract might not do a lossless conversion. For example it converts CMYK + input to RGB and removes the alpha channel from images with transparency. For + multipage TIFF or animated GIF, it will only convert the first frame. + +Keywords: jpeg pdf converter +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Other Audience +Classifier: Environment :: Console +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) +Classifier: Natural Language :: English +Classifier: Operating System :: OS Independent +Provides-Extra: test diff --git a/README.md b/README.md new file mode 100644 index 0000000..ef25643 --- /dev/null +++ b/README.md @@ -0,0 +1,218 @@ +img2pdf +======= + +Lossless conversion of raster images to PDF. You should use img2pdf if your +priorities are (in this order): + + 1. **always lossless**: the image embedded in the PDF will always have the + exact same color information for every pixel as the input + 2. **small**: if possible, the difference in filesize between the input image + and the output PDF will only be the overhead of the PDF container itself + 3. **fast**: if possible, the input image is just pasted into the PDF document + as-is without any CPU hungry re-encoding of the pixel data + +Conventional conversion software (like ImageMagick) would either: + + 1. not be lossless because lossy re-encoding to JPEG + 2. not be small because using wasteful flate encoding of raw pixel data + 3. not be fast because input data gets re-encoded + +Another advantage of not having to re-encode the input (in most common +situations) is, that img2pdf is able to handle much larger input than other +software, because the raw pixel data never has to be loaded into memory. + +The following table shows how img2pdf handles different input depending on the +input file format and image color space. + +| Format | Colorspace | Result | +| -------------------- | ------------------------------ | ------------- | +| JPEG | any | direct | +| JPEG2000 | any | direct | +| PNG (non-interlaced) | any | direct | +| TIFF (CCITT Group 4) | monochrome | direct | +| any | any except CMYK and monochrome | PNG Paeth | +| any | monochrome | CCITT Group 4 | +| any | CMYK | flate | + +For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4 +encoded data, img2pdf directly embeds the image data into the PDF without +re-encoding it. It thus treats the PDF format merely as a container format for +the image data. In these cases, img2pdf only increases the filesize by the size +of the PDF container (typically around 500 to 700 bytes). Since data is only +copied and not re-encoded, img2pdf is also typically faster than other +solutions for these input formats. + +For all other input types, img2pdf first has to transform the pixel data to +make it compatible with PDF. In most cases, the PNG Paeth filter is applied to +the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for +CMYK input no filter is applied before finally applying flate compression. + +Usage +----- + +The images must be provided as files because img2pdf needs to seek in the file +descriptor. + +If no output file is specified with the `-o`/`--output` option, output will be +done to stdout. A typical invocation is: + + $ img2pdf img1.png img2.jpg -o out.pdf + +The detailed documentation can be accessed by running: + + $ img2pdf --help + +Bugs +---- + + - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, + when embedded into the PDF cannot be read by the Adobe Acrobat Reader, + please contact me. + + - I have not yet figured out how to determine the colorspace of JPEG2000 + files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 + files with other colorspaces, you must explicitly specify it using the + `--colorspace` option. + + - Input images with alpha channels are not allowed. PDF doesn't support alpha + channels in images and thus, the alpha channel of the input would have to be + discarded. But img2pdf will always be lossless and thus, input images must + not carry transparency information. + + - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the + input if necessary. To prevent decompression bomb denial of service attacks, + Pillow limits the maximum number of pixels an input image is allowed to + have. If you are sure that you know what you are doing, then you can disable + this safeguard by passing the `--pillow-limit-break` option to img2pdf. This + allows one to process even very large input images. + +Installation +------------ + +On a Debian- and Ubuntu-based systems, img2pdf can be installed from the +official repositories: + + $ apt install img2pdf + +If you want to install it using pip, you can run: + + $ pip3 install img2pdf + +If you prefer to install from source code use: + + $ cd img2pdf/ + $ pip3 install . + +To test the console script without installing the package on your system, +use virtualenv: + + $ cd img2pdf/ + $ virtualenv ve + $ ve/bin/pip3 install . + +You can then test the converter using: + + $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg + +The package can also be used as a library: + + import img2pdf + + # opening from filename + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg')) + + # opening from file handle + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + f1.write(img2pdf.convert(f2)) + + # using in-memory image data + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("\x89PNG...") + + # multiple inputs (variant 1) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("test1.jpg", "test2.png")) + + # multiple inputs (variant 2) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert(["test1.jpg", "test2.png"])) + + # writing to file descriptor + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + img2pdf.convert(f2, outputstream=f1) + + # specify paper size (A4) + a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) + layout_fun = img2pdf.get_layout_fun(a4inpt) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) + +Comparison to ImageMagick +------------------------- + +Create a large test image: + + $ convert logo: -resize 8000x original.jpg + +Convert it into PDF using ImageMagick and img2pdf: + + $ time img2pdf original.jpg -o img2pdf.pdf + $ time convert original.jpg imagemagick.pdf + +Notice how ImageMagick took an order of magnitude longer to do the conversion +than img2pdf. It also used twice the memory. + +Now extract the image data from both PDF documents and compare it to the +original: + + $ pdfimages -all img2pdf.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 0 + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 118716 + +To get lossless output with ImageMagick we can use Zip compression but that +unnecessarily increases the size of the output: + + $ convert original.jpg -compress Zip imagemagick.pdf + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.png null: + 0 + $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf + 1535837 original.jpg + 1536683 img2pdf.pdf + 9397809 imagemagick.pdf + +Comparison to pdfLaTeX +---------------------- + +pdfLaTeX performs a lossless conversion from included images to PDF by default. +If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same +way as img2pdf does it. But for other image formats it uses flate compression +of the plain pixel data and thus needlessly increases the output file size: + + $ convert logo: -resize 8000x original.png + $ cat << END > pdflatex.tex + \documentclass{article} + \usepackage{graphicx} + \begin{document} + \includegraphics{original.png} + \end{document} + END + $ pdflatex pdflatex.tex + $ stat --format="%s %n" original.png pdflatex.pdf + 4500182 original.png + 9318120 pdflatex.pdf + +Comparison to Tesseract OCR +--------------------------- + +Tesseract OCR comes closest to the functionality img2pdf provides. It is able +to convert JPEG and PNG input to PDF without needlessly increasing the filesize +and is at the same time lossless. So if your input is JPEG and PNG images, then +you should safely be able to use Tesseract instead of img2pdf. For other input, +Tesseract might not do a lossless conversion. For example it converts CMYK +input to RGB and removes the alpha channel from images with transparency. For +multipage TIFF or animated GIF, it will only convert the first frame. diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..9f88734 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,7 @@ +[metadata] +description-file = README.md + +[egg_info] +tag_build = +tag_date = 0 + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b0438fe --- /dev/null +++ b/setup.py @@ -0,0 +1,62 @@ +import sys +from setuptools import setup + +PY3 = sys.version_info[0] >= 3 + +VERSION = "0.3.3" + +INSTALL_REQUIRES = ( + 'Pillow', +) + +TESTS_REQUIRE = ( + 'pdfrw', +) + +if not PY3: + INSTALL_REQUIRES += ('enum34',) + + +setup( + name='img2pdf', + version=VERSION, + author="Johannes 'josch' Schauer", + author_email='josch@mister-muffin.de', + description="Convert images to PDF via direct JPEG inclusion.", + long_description=open('README.md').read(), + license="LGPL", + keywords="jpeg pdf converter", + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Other Audience', + 'Environment :: Console', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: Implementation :: CPython', + "Programming Language :: Python :: Implementation :: PyPy", + 'License :: OSI Approved :: GNU Lesser General Public License v3 ' + '(LGPLv3)', + 'Natural Language :: English', + 'Operating System :: OS Independent'], + url='https://gitlab.mister-muffin.de/josch/img2pdf', + download_url='https://gitlab.mister-muffin.de/josch/img2pdf/repository/' + 'archive.tar.gz?ref=' + VERSION, + package_dir={"": "src"}, + py_modules=['img2pdf', 'jp2'], + include_package_data=True, + test_suite='tests.test_suite', + zip_safe=True, + install_requires=INSTALL_REQUIRES, + tests_requires=TESTS_REQUIRE, + extras_require={ + 'test': TESTS_REQUIRE, + }, + entry_points=''' + [console_scripts] + img2pdf = img2pdf:main + ''', + ) diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO new file mode 100644 index 0000000..7553591 --- /dev/null +++ b/src/img2pdf.egg-info/PKG-INFO @@ -0,0 +1,245 @@ +Metadata-Version: 2.1 +Name: img2pdf +Version: 0.3.3 +Summary: Convert images to PDF via direct JPEG inclusion. +Home-page: https://gitlab.mister-muffin.de/josch/img2pdf +Author: Johannes 'josch' Schauer +Author-email: josch@mister-muffin.de +License: LGPL +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.3.3 +Description: img2pdf + ======= + + Lossless conversion of raster images to PDF. You should use img2pdf if your + priorities are (in this order): + + 1. **always lossless**: the image embedded in the PDF will always have the + exact same color information for every pixel as the input + 2. **small**: if possible, the difference in filesize between the input image + and the output PDF will only be the overhead of the PDF container itself + 3. **fast**: if possible, the input image is just pasted into the PDF document + as-is without any CPU hungry re-encoding of the pixel data + + Conventional conversion software (like ImageMagick) would either: + + 1. not be lossless because lossy re-encoding to JPEG + 2. not be small because using wasteful flate encoding of raw pixel data + 3. not be fast because input data gets re-encoded + + Another advantage of not having to re-encode the input (in most common + situations) is, that img2pdf is able to handle much larger input than other + software, because the raw pixel data never has to be loaded into memory. + + The following table shows how img2pdf handles different input depending on the + input file format and image color space. + + | Format | Colorspace | Result | + | -------------------- | ------------------------------ | ------------- | + | JPEG | any | direct | + | JPEG2000 | any | direct | + | PNG (non-interlaced) | any | direct | + | TIFF (CCITT Group 4) | monochrome | direct | + | any | any except CMYK and monochrome | PNG Paeth | + | any | monochrome | CCITT Group 4 | + | any | CMYK | flate | + + For JPEG, JPEG2000, non-interlaced PNG and TIFF images with CCITT Group 4 + encoded data, img2pdf directly embeds the image data into the PDF without + re-encoding it. It thus treats the PDF format merely as a container format for + the image data. In these cases, img2pdf only increases the filesize by the size + of the PDF container (typically around 500 to 700 bytes). Since data is only + copied and not re-encoded, img2pdf is also typically faster than other + solutions for these input formats. + + For all other input types, img2pdf first has to transform the pixel data to + make it compatible with PDF. In most cases, the PNG Paeth filter is applied to + the pixel data. For monochrome input, CCITT Group 4 is used instead. Only for + CMYK input no filter is applied before finally applying flate compression. + + Usage + ----- + + The images must be provided as files because img2pdf needs to seek in the file + descriptor. + + If no output file is specified with the `-o`/`--output` option, output will be + done to stdout. A typical invocation is: + + $ img2pdf img1.png img2.jpg -o out.pdf + + The detailed documentation can be accessed by running: + + $ img2pdf --help + + Bugs + ---- + + - If you find a JPEG, JPEG2000, PNG or CCITT Group 4 encoded TIFF file that, + when embedded into the PDF cannot be read by the Adobe Acrobat Reader, + please contact me. + + - I have not yet figured out how to determine the colorspace of JPEG2000 + files. Therefore JPEG2000 files use DeviceRGB by default. For JPEG2000 + files with other colorspaces, you must explicitly specify it using the + `--colorspace` option. + + - Input images with alpha channels are not allowed. PDF doesn't support alpha + channels in images and thus, the alpha channel of the input would have to be + discarded. But img2pdf will always be lossless and thus, input images must + not carry transparency information. + + - img2pdf uses PIL (or Pillow) to obtain image meta data and to convert the + input if necessary. To prevent decompression bomb denial of service attacks, + Pillow limits the maximum number of pixels an input image is allowed to + have. If you are sure that you know what you are doing, then you can disable + this safeguard by passing the `--pillow-limit-break` option to img2pdf. This + allows one to process even very large input images. + + Installation + ------------ + + On a Debian- and Ubuntu-based systems, img2pdf can be installed from the + official repositories: + + $ apt install img2pdf + + If you want to install it using pip, you can run: + + $ pip3 install img2pdf + + If you prefer to install from source code use: + + $ cd img2pdf/ + $ pip3 install . + + To test the console script without installing the package on your system, + use virtualenv: + + $ cd img2pdf/ + $ virtualenv ve + $ ve/bin/pip3 install . + + You can then test the converter using: + + $ ve/bin/img2pdf -o test.pdf src/tests/test.jpg + + The package can also be used as a library: + + import img2pdf + + # opening from filename + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg')) + + # opening from file handle + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + f1.write(img2pdf.convert(f2)) + + # using in-memory image data + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("\x89PNG...") + + # multiple inputs (variant 1) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert("test1.jpg", "test2.png")) + + # multiple inputs (variant 2) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert(["test1.jpg", "test2.png"])) + + # writing to file descriptor + with open("name.pdf","wb") as f1, open("test.jpg") as f2: + img2pdf.convert(f2, outputstream=f1) + + # specify paper size (A4) + a4inpt = (img2pdf.mm_to_pt(210),img2pdf.mm_to_pt(297)) + layout_fun = img2pdf.get_layout_fun(a4inpt) + with open("name.pdf","wb") as f: + f.write(img2pdf.convert('test.jpg', layout_fun=layout_fun)) + + Comparison to ImageMagick + ------------------------- + + Create a large test image: + + $ convert logo: -resize 8000x original.jpg + + Convert it into PDF using ImageMagick and img2pdf: + + $ time img2pdf original.jpg -o img2pdf.pdf + $ time convert original.jpg imagemagick.pdf + + Notice how ImageMagick took an order of magnitude longer to do the conversion + than img2pdf. It also used twice the memory. + + Now extract the image data from both PDF documents and compare it to the + original: + + $ pdfimages -all img2pdf.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 0 + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.jpg null: + 118716 + + To get lossless output with ImageMagick we can use Zip compression but that + unnecessarily increases the size of the output: + + $ convert original.jpg -compress Zip imagemagick.pdf + $ pdfimages -all imagemagick.pdf tmp + $ compare -metric AE original.jpg tmp-000.png null: + 0 + $ stat --format="%s %n" original.jpg img2pdf.pdf imagemagick.pdf + 1535837 original.jpg + 1536683 img2pdf.pdf + 9397809 imagemagick.pdf + + Comparison to pdfLaTeX + ---------------------- + + pdfLaTeX performs a lossless conversion from included images to PDF by default. + If the input is a JPEG, then it simply embeds the JPEG into the PDF in the same + way as img2pdf does it. But for other image formats it uses flate compression + of the plain pixel data and thus needlessly increases the output file size: + + $ convert logo: -resize 8000x original.png + $ cat << END > pdflatex.tex + \documentclass{article} + \usepackage{graphicx} + \begin{document} + \includegraphics{original.png} + \end{document} + END + $ pdflatex pdflatex.tex + $ stat --format="%s %n" original.png pdflatex.pdf + 4500182 original.png + 9318120 pdflatex.pdf + + Comparison to Tesseract OCR + --------------------------- + + Tesseract OCR comes closest to the functionality img2pdf provides. It is able + to convert JPEG and PNG input to PDF without needlessly increasing the filesize + and is at the same time lossless. So if your input is JPEG and PNG images, then + you should safely be able to use Tesseract instead of img2pdf. For other input, + Tesseract might not do a lossless conversion. For example it converts CMYK + input to RGB and removes the alpha channel from images with transparency. For + multipage TIFF or animated GIF, it will only convert the first frame. + +Keywords: jpeg pdf converter +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Other Audience +Classifier: Environment :: Console +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) +Classifier: Natural Language :: English +Classifier: Operating System :: OS Independent +Provides-Extra: test diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt new file mode 100644 index 0000000..6fa068a --- /dev/null +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -0,0 +1,35 @@ +CHANGES.rst +LICENSE +MANIFEST.in +README.md +setup.cfg +setup.py +test.sh +test_comp.sh +src/img2pdf.py +src/jp2.py +src/img2pdf.egg-info/PKG-INFO +src/img2pdf.egg-info/SOURCES.txt +src/img2pdf.egg-info/dependency_links.txt +src/img2pdf.egg-info/entry_points.txt +src/img2pdf.egg-info/pbr.json +src/img2pdf.egg-info/requires.txt +src/img2pdf.egg-info/top_level.txt +src/img2pdf.egg-info/zip-safe +src/tests/__init__.py +src/tests/input/CMYK.jpg +src/tests/input/CMYK.tif +src/tests/input/animation.gif +src/tests/input/gray.png +src/tests/input/mono.png +src/tests/input/mono.tif +src/tests/input/normal.jpg +src/tests/input/normal.png +src/tests/output/CMYK.jpg.pdf +src/tests/output/CMYK.tif.pdf +src/tests/output/animation.gif.pdf +src/tests/output/gray.png.pdf +src/tests/output/mono.png.pdf +src/tests/output/mono.tif.pdf +src/tests/output/normal.jpg.pdf +src/tests/output/normal.png.pdf \ No newline at end of file diff --git a/src/img2pdf.egg-info/dependency_links.txt b/src/img2pdf.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/img2pdf.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/img2pdf.egg-info/entry_points.txt b/src/img2pdf.egg-info/entry_points.txt new file mode 100644 index 0000000..59301dc --- /dev/null +++ b/src/img2pdf.egg-info/entry_points.txt @@ -0,0 +1,4 @@ + + [console_scripts] + img2pdf = img2pdf:main + \ No newline at end of file diff --git a/src/img2pdf.egg-info/pbr.json b/src/img2pdf.egg-info/pbr.json new file mode 100644 index 0000000..bc27bf9 --- /dev/null +++ b/src/img2pdf.egg-info/pbr.json @@ -0,0 +1 @@ +{"is_release": false, "git_version": "d78b2cb"} \ No newline at end of file diff --git a/src/img2pdf.egg-info/requires.txt b/src/img2pdf.egg-info/requires.txt new file mode 100644 index 0000000..3a24589 --- /dev/null +++ b/src/img2pdf.egg-info/requires.txt @@ -0,0 +1,4 @@ +Pillow + +[test] +pdfrw diff --git a/src/img2pdf.egg-info/top_level.txt b/src/img2pdf.egg-info/top_level.txt new file mode 100644 index 0000000..0636fd7 --- /dev/null +++ b/src/img2pdf.egg-info/top_level.txt @@ -0,0 +1,2 @@ +img2pdf +jp2 diff --git a/src/img2pdf.egg-info/zip-safe b/src/img2pdf.egg-info/zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/img2pdf.egg-info/zip-safe @@ -0,0 +1 @@ + diff --git a/src/img2pdf.py b/src/img2pdf.py new file mode 100755 index 0000000..27e5b8c --- /dev/null +++ b/src/img2pdf.py @@ -0,0 +1,2697 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright (C) 2012-2014 Johannes 'josch' Schauer +# +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation, either +# version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program. If not, see +# . + +import sys +import os +import zlib +import argparse +from PIL import Image, TiffImagePlugin + +# TiffImagePlugin.DEBUG = True +from PIL.ExifTags import TAGS +from datetime import datetime +from jp2 import parsejp2 +from enum import Enum +from io import BytesIO +import logging +import struct + +PY3 = sys.version_info[0] >= 3 + +__version__ = "0.3.3" +default_dpi = 96.0 +papersizes = { + "letter": "8.5inx11in", + "a0": "841mmx1189mm", + "a1": "594mmx841mm", + "a2": "420mmx594mm", + "a3": "297mmx420mm", + "a4": "210mmx297mm", + "a5": "148mmx210mm", + "a6": "105mmx148mm", + "legal": "8.5inx14in", + "tabloid": "11inx17in", +} +papernames = { + "letter": "Letter", + "a0": "A0", + "a1": "A1", + "a2": "A2", + "a3": "A3", + "a4": "A4", + "a5": "A5", + "a6": "A6", + "legal": "Legal", + "tabloid": "Tabloid", +} + + +FitMode = Enum("FitMode", "into fill exact shrink enlarge") + +PageOrientation = Enum("PageOrientation", "portrait landscape") + +Colorspace = Enum("Colorspace", "RGB L 1 CMYK CMYK;I RGBA P other") + +ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF other") + +PageMode = Enum("PageMode", "none outlines thumbs") + +PageLayout = Enum("PageLayout", "single onecolumn twocolumnright twocolumnleft") + +Magnification = Enum("Magnification", "fit fith fitbh") + +ImgSize = Enum("ImgSize", "abs perc dpi") + +Unit = Enum("Unit", "pt cm mm inch") + +ImgUnit = Enum("ImgUnit", "pt cm mm inch perc dpi") + +TIFFBitRevTable = [ + 0x00, + 0x80, + 0x40, + 0xC0, + 0x20, + 0xA0, + 0x60, + 0xE0, + 0x10, + 0x90, + 0x50, + 0xD0, + 0x30, + 0xB0, + 0x70, + 0xF0, + 0x08, + 0x88, + 0x48, + 0xC8, + 0x28, + 0xA8, + 0x68, + 0xE8, + 0x18, + 0x98, + 0x58, + 0xD8, + 0x38, + 0xB8, + 0x78, + 0xF8, + 0x04, + 0x84, + 0x44, + 0xC4, + 0x24, + 0xA4, + 0x64, + 0xE4, + 0x14, + 0x94, + 0x54, + 0xD4, + 0x34, + 0xB4, + 0x74, + 0xF4, + 0x0C, + 0x8C, + 0x4C, + 0xCC, + 0x2C, + 0xAC, + 0x6C, + 0xEC, + 0x1C, + 0x9C, + 0x5C, + 0xDC, + 0x3C, + 0xBC, + 0x7C, + 0xFC, + 0x02, + 0x82, + 0x42, + 0xC2, + 0x22, + 0xA2, + 0x62, + 0xE2, + 0x12, + 0x92, + 0x52, + 0xD2, + 0x32, + 0xB2, + 0x72, + 0xF2, + 0x0A, + 0x8A, + 0x4A, + 0xCA, + 0x2A, + 0xAA, + 0x6A, + 0xEA, + 0x1A, + 0x9A, + 0x5A, + 0xDA, + 0x3A, + 0xBA, + 0x7A, + 0xFA, + 0x06, + 0x86, + 0x46, + 0xC6, + 0x26, + 0xA6, + 0x66, + 0xE6, + 0x16, + 0x96, + 0x56, + 0xD6, + 0x36, + 0xB6, + 0x76, + 0xF6, + 0x0E, + 0x8E, + 0x4E, + 0xCE, + 0x2E, + 0xAE, + 0x6E, + 0xEE, + 0x1E, + 0x9E, + 0x5E, + 0xDE, + 0x3E, + 0xBE, + 0x7E, + 0xFE, + 0x01, + 0x81, + 0x41, + 0xC1, + 0x21, + 0xA1, + 0x61, + 0xE1, + 0x11, + 0x91, + 0x51, + 0xD1, + 0x31, + 0xB1, + 0x71, + 0xF1, + 0x09, + 0x89, + 0x49, + 0xC9, + 0x29, + 0xA9, + 0x69, + 0xE9, + 0x19, + 0x99, + 0x59, + 0xD9, + 0x39, + 0xB9, + 0x79, + 0xF9, + 0x05, + 0x85, + 0x45, + 0xC5, + 0x25, + 0xA5, + 0x65, + 0xE5, + 0x15, + 0x95, + 0x55, + 0xD5, + 0x35, + 0xB5, + 0x75, + 0xF5, + 0x0D, + 0x8D, + 0x4D, + 0xCD, + 0x2D, + 0xAD, + 0x6D, + 0xED, + 0x1D, + 0x9D, + 0x5D, + 0xDD, + 0x3D, + 0xBD, + 0x7D, + 0xFD, + 0x03, + 0x83, + 0x43, + 0xC3, + 0x23, + 0xA3, + 0x63, + 0xE3, + 0x13, + 0x93, + 0x53, + 0xD3, + 0x33, + 0xB3, + 0x73, + 0xF3, + 0x0B, + 0x8B, + 0x4B, + 0xCB, + 0x2B, + 0xAB, + 0x6B, + 0xEB, + 0x1B, + 0x9B, + 0x5B, + 0xDB, + 0x3B, + 0xBB, + 0x7B, + 0xFB, + 0x07, + 0x87, + 0x47, + 0xC7, + 0x27, + 0xA7, + 0x67, + 0xE7, + 0x17, + 0x97, + 0x57, + 0xD7, + 0x37, + 0xB7, + 0x77, + 0xF7, + 0x0F, + 0x8F, + 0x4F, + 0xCF, + 0x2F, + 0xAF, + 0x6F, + 0xEF, + 0x1F, + 0x9F, + 0x5F, + 0xDF, + 0x3F, + 0xBF, + 0x7F, + 0xFF, +] + + +class NegativeDimensionError(Exception): + pass + + +class UnsupportedColorspaceError(Exception): + pass + + +class ImageOpenError(Exception): + pass + + +class JpegColorspaceError(Exception): + pass + + +class PdfTooLargeError(Exception): + pass + + +# without pdfrw this function is a no-op +def my_convert_load(string): + return string + + +def parse(cont, indent=1): + if type(cont) is dict: + return ( + b"<<\n" + + b"\n".join( + [ + 4 * indent * b" " + k + b" " + parse(v, indent + 1) + for k, v in sorted(cont.items()) + ] + ) + + b"\n" + + 4 * (indent - 1) * b" " + + b">>" + ) + elif type(cont) is int: + return str(cont).encode() + elif type(cont) is float: + if int(cont) == cont: + return parse(int(cont)) + else: + return ("%0.4f" % cont).rstrip("0").encode() + elif isinstance(cont, MyPdfDict): + # if cont got an identifier, then addobj() has been called with it + # and a link to it will be added, otherwise add it inline + if hasattr(cont, "identifier"): + return ("%d 0 R" % cont.identifier).encode() + else: + return parse(cont.content, indent) + elif type(cont) is str or isinstance(cont, bytes): + if type(cont) is str and type(cont) is not bytes: + raise TypeError( + "parse must be passed a bytes object in py3. Got: %s" % cont + ) + return cont + elif isinstance(cont, list): + return b"[ " + b" ".join([parse(c, indent) for c in cont]) + b" ]" + else: + raise TypeError("cannot handle type %s with content %s" % (type(cont), cont)) + + +class MyPdfDict(object): + def __init__(self, *args, **kw): + self.content = dict() + if args: + if len(args) == 1: + args = args[0] + self.content.update(args) + self.stream = None + for key, value in kw.items(): + if key == "stream": + self.stream = value + self.content[MyPdfName.Length] = len(value) + elif key == "indirect": + pass + else: + self.content[getattr(MyPdfName, key)] = value + + def tostring(self): + if self.stream is not None: + return ( + ("%d 0 obj\n" % self.identifier).encode() + + parse(self.content) + + b"\nstream\n" + + self.stream + + b"\nendstream\nendobj\n" + ) + else: + return ( + ("%d 0 obj\n" % self.identifier).encode() + + parse(self.content) + + b"\nendobj\n" + ) + + def __setitem__(self, key, value): + self.content[key] = value + + def __getitem__(self, key): + return self.content[key] + + +class MyPdfName: + def __getattr__(self, name): + return b"/" + name.encode("ascii") + + +MyPdfName = MyPdfName() + + +class MyPdfObject(bytes): + def __new__(cls, string): + return bytes.__new__(cls, string.encode("ascii")) + + +class MyPdfArray(list): + pass + + +class MyPdfWriter: + def __init__(self, version="1.3"): + self.objects = [] + # create an incomplete pages object so that a /Parent entry can be + # added to each page + self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0) + self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog) + self.version = version # default pdf version 1.3 + self.pagearray = [] + + def addobj(self, obj): + newid = len(self.objects) + 1 + obj.identifier = newid + self.objects.append(obj) + + def tostream(self, info, stream): + xreftable = list() + + # justification of the random binary garbage in the header from + # adobe: + # + # > Note: If a PDF file contains binary data, as most do (see Section + # > 3.1, “Lexical Conventions”), it is recommended that the header + # > line be immediately followed by a comment line containing at + # > least four binary characters—that is, characters whose codes are + # > 128 or greater. This ensures proper behavior of file transfer + # > applications that inspect data near the beginning of a file to + # > determine whether to treat the file’s contents as text or as + # > binary. + # + # the choice of binary characters is arbitrary but those four seem to + # be used elsewhere. + pdfheader = ("%%PDF-%s\n" % self.version).encode("ascii") + pdfheader += b"%\xe2\xe3\xcf\xd3\n" + stream.write(pdfheader) + + # From section 3.4.3 of the PDF Reference (version 1.7): + # + # > Each entry is exactly 20 bytes long, including the end-of-line + # > marker. + # > + # > [...] + # > + # > The format of an in-use entry is + # > nnnnnnnnnn ggggg n eol + # > where + # > nnnnnnnnnn is a 10-digit byte offset + # > ggggg is a 5-digit generation number + # > n is a literal keyword identifying this as an in-use entry + # > eol is a 2-character end-of-line sequence + # > + # > [...] + # > + # > If the file’s end-of-line marker is a single character (either a + # > carriage return or a line feed), it is preceded by a single space; + # + # Since we chose to use a single character eol marker, we precede it by + # a space + pos = len(pdfheader) + xreftable.append(b"0000000000 65535 f \n") + for o in self.objects: + xreftable.append(("%010d 00000 n \n" % pos).encode()) + content = o.tostring() + stream.write(content) + pos += len(content) + + xrefoffset = pos + stream.write(b"xref\n") + stream.write(("0 %d\n" % len(xreftable)).encode()) + for x in xreftable: + stream.write(x) + stream.write(b"trailer\n") + stream.write( + parse({b"/Size": len(xreftable), b"/Info": info, b"/Root": self.catalog}) + + b"\n" + ) + stream.write(b"startxref\n") + stream.write(("%d\n" % xrefoffset).encode()) + stream.write(b"%%EOF\n") + return + + def addpage(self, page): + page[b"/Parent"] = self.pages + self.pagearray.append(page) + self.pages.content[b"/Kids"].append(page) + self.pages.content[b"/Count"] += 1 + self.addobj(page) + + +if PY3: + + class MyPdfString: + @classmethod + def encode(cls, string, hextype=False): + if hextype: + return ( + b"< " + + b" ".join(("%06x" % c).encode("ascii") for c in string) + + b" >" + ) + else: + try: + string = string.encode("ascii") + except UnicodeEncodeError: + string = b"\xfe\xff" + string.encode("utf-16-be") + # We should probably encode more here because at least + # ghostscript interpretes a carriage return byte (0x0D) as a + # new line byte (0x0A) + # PDF supports: \n, \r, \t, \b and \f + string = string.replace(b"\\", b"\\\\") + string = string.replace(b"(", b"\\(") + string = string.replace(b")", b"\\)") + return b"(" + string + b")" + + +else: + + class MyPdfString(object): + @classmethod + def encode(cls, string, hextype=False): + if hextype: + return ( + b"< " + + b" ".join(("%06x" % c).encode("ascii") for c in string) + + b" >" + ) + else: + # This mimics exactely to what pdfrw does. + string = string.replace(b"\\", b"\\\\") + string = string.replace(b"(", b"\\(") + string = string.replace(b")", b"\\)") + return b"(" + string + b")" + + +class pdfdoc(object): + def __init__( + self, + version="1.3", + title=None, + author=None, + creator=None, + producer=None, + creationdate=None, + moddate=None, + subject=None, + keywords=None, + nodate=False, + panes=None, + initial_page=None, + magnification=None, + page_layout=None, + fit_window=False, + center_window=False, + fullscreen=False, + with_pdfrw=True, + ): + if with_pdfrw: + try: + from pdfrw import PdfWriter, PdfDict, PdfName, PdfString + + self.with_pdfrw = True + except ImportError: + PdfWriter = MyPdfWriter + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfString = MyPdfString + self.with_pdfrw = False + else: + PdfWriter = MyPdfWriter + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfString = MyPdfString + self.with_pdfrw = False + + now = datetime.now() + self.info = PdfDict(indirect=True) + + def datetime_to_pdfdate(dt): + return dt.strftime("%Y%m%d%H%M%SZ") + + if title is not None: + self.info[PdfName.Title] = PdfString.encode(title) + if author is not None: + self.info[PdfName.Author] = PdfString.encode(author) + if creator is not None: + self.info[PdfName.Creator] = PdfString.encode(creator) + if producer is not None and producer != "": + self.info[PdfName.Producer] = PdfString.encode(producer) + if creationdate is not None: + self.info[PdfName.CreationDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(creationdate) + ) + elif not nodate: + self.info[PdfName.CreationDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(now) + ) + if moddate is not None: + self.info[PdfName.ModDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(moddate) + ) + elif not nodate: + self.info[PdfName.ModDate] = PdfString.encode( + "D:" + datetime_to_pdfdate(now) + ) + if subject is not None: + self.info[PdfName.Subject] = PdfString.encode(subject) + if keywords is not None: + self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords)) + + self.writer = PdfWriter() + self.writer.version = version + # this is done because pdfrw adds info, catalog and pages as the first + # three objects in this order + if not self.with_pdfrw: + self.writer.addobj(self.info) + self.writer.addobj(self.writer.catalog) + self.writer.addobj(self.writer.pages) + + self.panes = panes + self.initial_page = initial_page + self.magnification = magnification + self.page_layout = page_layout + self.fit_window = fit_window + self.center_window = center_window + self.fullscreen = fullscreen + + def add_imagepage( + self, + color, + imgwidthpx, + imgheightpx, + imgformat, + imgdata, + imgwidthpdf, + imgheightpdf, + imgxpdf, + imgypdf, + pagewidth, + pageheight, + userunit=None, + palette=None, + inverted=False, + depth=0, + rotate=0, + ): + if self.with_pdfrw: + from pdfrw import PdfDict, PdfName, PdfObject, PdfString + from pdfrw.py23_diffs import convert_load + else: + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfObject = MyPdfObject + PdfString = MyPdfString + convert_load = my_convert_load + + if color == Colorspace["1"] or color == Colorspace.L: + colorspace = PdfName.DeviceGray + elif color == Colorspace.RGB: + colorspace = PdfName.DeviceRGB + elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: + colorspace = PdfName.DeviceCMYK + elif color == Colorspace.P: + if self.with_pdfrw: + raise Exception( + "pdfrw does not support hex strings for " + "palette image input, re-run with " + "--without-pdfrw" + ) + colorspace = [ + PdfName.Indexed, + PdfName.DeviceRGB, + len(palette) - 1, + PdfString.encode(palette, hextype=True), + ] + else: + raise UnsupportedColorspaceError("unsupported color space: %s" % color.name) + + # either embed the whole jpeg or deflate the bitmap representation + if imgformat is ImageFormat.JPEG: + ofilter = PdfName.DCTDecode + elif imgformat is ImageFormat.JPEG2000: + ofilter = PdfName.JPXDecode + self.writer.version = "1.5" # jpeg2000 needs pdf 1.5 + elif imgformat is ImageFormat.CCITTGroup4: + ofilter = [PdfName.CCITTFaxDecode] + else: + ofilter = PdfName.FlateDecode + + image = PdfDict(stream=convert_load(imgdata)) + + image[PdfName.Type] = PdfName.XObject + image[PdfName.Subtype] = PdfName.Image + image[PdfName.Filter] = ofilter + image[PdfName.Width] = imgwidthpx + image[PdfName.Height] = imgheightpx + image[PdfName.ColorSpace] = colorspace + image[PdfName.BitsPerComponent] = depth + + if color == Colorspace["CMYK;I"]: + # Inverts all four channels + image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0] + + if imgformat is ImageFormat.CCITTGroup4: + decodeparms = PdfDict() + # The default for the K parameter is 0 which indicates Group 3 1-D + # encoding. We set it to -1 because we want Group 4 encoding. + decodeparms[PdfName.K] = -1 + if inverted: + decodeparms[PdfName.BlackIs1] = PdfObject("false") + else: + decodeparms[PdfName.BlackIs1] = PdfObject("true") + decodeparms[PdfName.Columns] = imgwidthpx + decodeparms[PdfName.Rows] = imgheightpx + image[PdfName.DecodeParms] = [decodeparms] + elif imgformat is ImageFormat.PNG: + decodeparms = PdfDict() + decodeparms[PdfName.Predictor] = 15 + if color in [Colorspace.P, Colorspace["1"], Colorspace.L]: + decodeparms[PdfName.Colors] = 1 + else: + decodeparms[PdfName.Colors] = 3 + decodeparms[PdfName.Columns] = imgwidthpx + decodeparms[PdfName.BitsPerComponent] = depth + image[PdfName.DecodeParms] = decodeparms + + text = ( + "q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" + % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf) + ).encode("ascii") + + content = PdfDict(stream=convert_load(text)) + resources = PdfDict(XObject=PdfDict(Im0=image)) + + page = PdfDict(indirect=True) + page[PdfName.Type] = PdfName.Page + page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight] + page[PdfName.Resources] = resources + page[PdfName.Contents] = content + if rotate != 0: + page[PdfName.Rotate] = rotate + if userunit is not None: + # /UserUnit requires PDF 1.6 + if self.writer.version < "1.6": + self.writer.version = "1.6" + page[PdfName.UserUnit] = userunit + + self.writer.addpage(page) + + if not self.with_pdfrw: + self.writer.addobj(content) + self.writer.addobj(image) + + def tostring(self): + stream = BytesIO() + self.tostream(stream) + return stream.getvalue() + + def tostream(self, outputstream): + if self.with_pdfrw: + from pdfrw import PdfDict, PdfName, PdfArray, PdfObject + else: + PdfDict = MyPdfDict + PdfName = MyPdfName + PdfObject = MyPdfObject + PdfArray = MyPdfArray + NullObject = PdfObject("null") + TrueObject = PdfObject("true") + + # We fill the catalog with more information like /ViewerPreferences, + # /PageMode, /PageLayout or /OpenAction because the latter refers to a + # page object which has to be present so that we can get its id. + # + # Furthermore, if using pdfrw, the trailer is cleared every time a page + # is added, so we can only start using it after all pages have been + # written. + + if self.with_pdfrw: + catalog = self.writer.trailer.Root + else: + catalog = self.writer.catalog + + if ( + self.fullscreen + or self.fit_window + or self.center_window + or self.panes is not None + ): + catalog[PdfName.ViewerPreferences] = PdfDict() + + if self.fullscreen: + # this setting might be overwritten later by the page mode + catalog[PdfName.ViewerPreferences][ + PdfName.NonFullScreenPageMode + ] = PdfName.UseNone + + if self.panes == PageMode.thumbs: + catalog[PdfName.ViewerPreferences][ + PdfName.NonFullScreenPageMode + ] = PdfName.UseThumbs + # this setting might be overwritten later if fullscreen + catalog[PdfName.PageMode] = PdfName.UseThumbs + elif self.panes == PageMode.outlines: + catalog[PdfName.ViewerPreferences][ + PdfName.NonFullScreenPageMode + ] = PdfName.UseOutlines + # this setting might be overwritten later if fullscreen + catalog[PdfName.PageMode] = PdfName.UseOutlines + elif self.panes in [PageMode.none, None]: + pass + else: + raise ValueError("unknown page mode: %s" % self.panes) + + if self.fit_window: + catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject + + if self.center_window: + catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = TrueObject + + if self.fullscreen: + catalog[PdfName.PageMode] = PdfName.FullScreen + + # see table 8.2 in section 8.2.1 in + # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf + # Fit - Fits the page to the window. + # FitH - Fits the width of the page to the window. + # FitV - Fits the height of the page to the window. + # FitR - Fits the rectangle specified by the four coordinates to the + # window. + # FitB - Fits the page bounding box to the window. This basically + # reduces the amount of whitespace (margins) that is displayed + # and thus focussing more on the text content. + # FitBH - Fits the width of the page bounding box to the window. + # FitBV - Fits the height of the page bounding box to the window. + + # by default the initial page is the first one + initial_page = self.writer.pagearray[0] + # we set the open action here to make sure we open on the requested + # initial page but this value might be overwritten by a custom open + # action later while still taking the requested initial page into + # account + if self.initial_page is not None: + initial_page = self.writer.pagearray[self.initial_page - 1] + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.XYZ, NullObject, NullObject, 0] + ) + + if self.magnification == Magnification.fit: + catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit]) + elif self.magnification == Magnification.fith: + pagewidth = initial_page[PdfName.MediaBox][2] + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.FitH, pagewidth] + ) + elif self.magnification == Magnification.fitbh: + # quick hack to determine the image width on the page + imgwidth = float(initial_page[PdfName.Contents].stream.split()[4]) + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.FitBH, imgwidth] + ) + elif isinstance(self.magnification, float): + catalog[PdfName.OpenAction] = PdfArray( + [initial_page, PdfName.XYZ, NullObject, NullObject, self.magnification] + ) + elif self.magnification is None: + pass + else: + raise ValueError("unknown magnification: %s" % self.magnification) + + if self.page_layout == PageLayout.single: + catalog[PdfName.PageLayout] = PdfName.SinglePage + elif self.page_layout == PageLayout.onecolumn: + catalog[PdfName.PageLayout] = PdfName.OneColumn + elif self.page_layout == PageLayout.twocolumnright: + catalog[PdfName.PageLayout] = PdfName.TwoColumnRight + elif self.page_layout == PageLayout.twocolumnleft: + catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft + elif self.page_layout is None: + pass + else: + raise ValueError("unknown page layout: %s" % self.page_layout) + + # now write out the PDF + if self.with_pdfrw: + self.writer.trailer.Info = self.info + self.writer.write(outputstream) + else: + self.writer.tostream(self.info, outputstream) + + +def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): + if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: + # this codepath gets called if the PIL installation is not able to + # handle JPEG2000 files + imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata) + + if hdpi is None: + hdpi = default_dpi + if vdpi is None: + vdpi = default_dpi + ndpi = (hdpi, vdpi) + else: + imgwidthpx, imgheightpx = imgdata.size + + ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi)) + # In python3, the returned dpi value for some tiff images will + # not be an integer but a float. To make the behaviour of + # img2pdf the same between python2 and python3, we convert that + # float into an integer by rounding. + # Search online for the 72.009 dpi problem for more info. + ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) + ics = imgdata.mode + + if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info: + logging.warning( + "Image contains transparency which cannot be retained " "in PDF." + ) + logging.warning("img2pdf will not perform a lossy operation.") + logging.warning("You can remove the alpha channel using imagemagick:") + logging.warning( + " $ convert input.png -background white -alpha " + "remove -alpha off output.png" + ) + raise Exception("Refusing to work on images with alpha channel") + + # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0 + # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG + # header. In that case it can happen that the horizontal and vertical DPI + # are set to zero. + if ndpi == (0, 0): + ndpi = (default_dpi, default_dpi) + + # PIL defaults to a dpi of 1 if a TIFF image does not specify the dpi. + # In that case, we want to use a different default. + if ndpi == (1, 1) and imgformat == ImageFormat.TIFF: + ndpi = ( + imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi), + imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi), + ) + + logging.debug("input dpi = %d x %d", *ndpi) + + rotation = 0 + if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None: + for tag, value in imgdata._getexif().items(): + if TAGS.get(tag, tag) == "Orientation": + # Detailed information on EXIF rotation tags: + # http://impulseadventure.com/photo/exif-orientation.html + if value == 1: + rotation = 0 + elif value == 6: + rotation = 90 + elif value == 3: + rotation = 180 + elif value == 8: + rotation = 270 + elif value in (2, 4, 5, 7): + raise Exception( + 'Image "%s": Unsupported flipped ' + "rotation mode (%d)" % (im.name, value) + ) + else: + raise Exception( + 'Image "%s": invalid rotation (%d)' % (im.name, value) + ) + + logging.debug("rotation = %d°", rotation) + + if colorspace: + color = colorspace + logging.debug("input colorspace (forced) = %s", color) + else: + color = None + for c in Colorspace: + if c.name == ics: + color = c + if color is None: + # PIL does not provide the information about the original + # colorspace for 16bit grayscale PNG images. Thus, we retrieve + # that info manually by looking at byte 10 in the IHDR chunk. We + # know where to find that in the file because the IHDR chunk must + # be the first chunk + if ( + rawdata is not None + and imgformat == ImageFormat.PNG + and rawdata[25] == 0 + ): + color = Colorspace.L + else: + raise ValueError("unknown colorspace") + if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG: + # Adobe inverts CMYK JPEGs for some reason, and others + # have followed suit as well. Some software assumes the + # JPEG is inverted if the Adobe tag (APP14), while other + # software assumes all CMYK JPEGs are inverted. I don't + # have enough experience with these to know which is + # better for images currently in the wild, so I'm going + # with the first approach for now. + if "adobe" in imgdata.info: + color = Colorspace["CMYK;I"] + logging.debug("input colorspace = %s", color.name) + + logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) + + return (color, ndpi, imgwidthpx, imgheightpx, rotation) + + +def ccitt_payload_location_from_pil(img): + # If Pillow is passed an invalid compression argument it will ignore it; + # make sure the image actually got compressed. + if img.info["compression"] != "group4": + raise ValueError( + "Image not compressed with CCITT Group 4 but with: %s" + % img.info["compression"] + ) + + # Read the TIFF tags to find the offset(s) of the compressed data strips. + strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS] + strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] + rows_per_strip = img.tag_v2.get(TiffImagePlugin.ROWSPERSTRIP, 2 ** 32 - 1) + + # PIL always seems to create a single strip even for very large TIFFs when + # it saves images, so assume we only have to read a single strip. + # A test ~10 GPixel image was still encoded as a single strip. Just to be + # safe check throw an error if there is more than one offset. + if len(strip_offsets) != 1 or len(strip_bytes) != 1: + raise NotImplementedError("Transcoding multiple strips not supported") + + (offset,), (length,) = strip_offsets, strip_bytes + + logging.debug("TIFF strip_offsets: %d" % offset) + logging.debug("TIFF strip_bytes: %d" % length) + + return offset, length + + +def transcode_monochrome(imgdata): + """Convert the open PIL.Image imgdata to compressed CCITT Group4 data""" + + logging.debug("Converting monochrome to CCITT Group4") + + # Convert the image to Group 4 in memory. If libtiff is not installed and + # Pillow is not compiled against it, .save() will raise an exception. + newimgio = BytesIO() + + # we create a whole new PIL image or otherwise it might happen with some + # input images, that libtiff fails an assert and the whole process is + # killed by a SIGABRT: + # https://gitlab.mister-muffin.de/josch/img2pdf/issues/46 + im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes()) + im.save(newimgio, format="TIFF", compression="group4") + + # Open new image in memory + newimgio.seek(0) + newimg = Image.open(newimgio) + + offset, length = ccitt_payload_location_from_pil(newimg) + + newimgio.seek(offset) + return newimgio.read(length) + + +def parse_png(rawdata): + pngidat = b"" + palette = [] + i = 16 + while i < len(rawdata): + # once we can require Python >= 3.2 we can use int.from_bytes() instead + n, = struct.unpack(">I", rawdata[i - 8 : i - 4]) + if i + n > len(rawdata): + raise Exception("invalid png: %d %d %d" % (i, n, len(rawdata))) + if rawdata[i - 4 : i] == b"IDAT": + pngidat += rawdata[i : i + n] + elif rawdata[i - 4 : i] == b"PLTE": + # This could be as simple as saying "palette = rawdata[i:i+n]" but + # pdfrw does only escape parenthesis and backslashes in the raw + # byte stream. But raw carriage return bytes are interpreted as + # line feed bytes by ghostscript. So instead we use the hex string + # format. pdfrw cannot write it but at least ghostscript is happy + # with it. We would also write out the palette in binary format + # (and escape more bytes) but since we cannot use pdfrw anyways, + # we choose the more human readable variant. + # See https://github.com/pmaupin/pdfrw/issues/147 + for j in range(i, i + n, 3): + # with int.from_bytes() we would not have to prepend extra + # zeroes + color, = struct.unpack(">I", b"\x00" + rawdata[j : j + 3]) + palette.append(color) + i += n + i += 12 + return pngidat, palette + + +def read_images(rawdata, colorspace, first_frame_only=False): + im = BytesIO(rawdata) + im.seek(0) + imgdata = None + try: + imgdata = Image.open(im) + except IOError as e: + # test if it is a jpeg2000 image + if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + raise ImageOpenError( + "cannot read input image (not jpeg2000). " + "PIL: error reading image: %s" % e + ) + # image is jpeg2000 + imgformat = ImageFormat.JPEG2000 + else: + imgformat = None + for f in ImageFormat: + if f.name == imgdata.format: + imgformat = f + if imgformat is None: + imgformat = ImageFormat.other + + logging.debug("imgformat = %s", imgformat.name) + + # depending on the input format, determine whether to pass the raw + # image or the zlib compressed color information + + # JPEG and JPEG2000 can be embedded into the PDF as-is + if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata + ) + if color == Colorspace["1"]: + raise JpegColorspaceError("jpeg can't be monochrome") + if color == Colorspace["P"]: + raise JpegColorspaceError("jpeg can't have a color palette") + if color == Colorspace["RGBA"]: + raise JpegColorspaceError("jpeg can't have an alpha channel") + im.close() + logging.debug("read_images() embeds a JPEG") + return [ + ( + color, + ndpi, + imgformat, + rawdata, + imgwidthpx, + imgheightpx, + [], + False, + 8, + rotation, + ) + ] + + # We can directly embed the IDAT chunk of PNG images if the PNG is not + # interlaced + # + # PIL does not provide the information whether a PNG was stored interlaced + # or not. Thus, we retrieve that info manually by looking at byte 13 in the + # IHDR chunk. We know where to find that in the file because the IHDR chunk + # must be the first chunk. + if imgformat == ImageFormat.PNG and rawdata[28] == 0: + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata + ) + pngidat, palette = parse_png(rawdata) + im.close() + # PIL does not provide the information about the original bits per + # sample. Thus, we retrieve that info manually by looking at byte 9 in + # the IHDR chunk. We know where to find that in the file because the + # IHDR chunk must be the first chunk + depth = rawdata[24] + if depth not in [1, 2, 4, 8, 16]: + raise ValueError("invalid bit depth: %d" % depth) + logging.debug("read_images() embeds a PNG") + return [ + ( + color, + ndpi, + imgformat, + pngidat, + imgwidthpx, + imgheightpx, + palette, + False, + depth, + rotation, + ) + ] + + # If our input is not JPEG or PNG, then we might have a format that + # supports multiple frames (like TIFF or GIF), so we need a loop to + # iterate through all frames of the image. + # + # Each frame gets compressed using PNG compression *except* if: + # + # * The image is monochrome => encode using CCITT group 4 + # + # * The image is CMYK => zip plain RGB data + # + # * We are handling a CCITT encoded TIFF frame => embed data + + result = [] + img_page_count = 0 + # loop through all frames of the image (example: multipage TIFF) + while True: + try: + imgdata.seek(img_page_count) + except EOFError: + break + + if first_frame_only and img_page_count > 0: + break + + # PIL is unable to preserve the data of 16-bit RGB TIFF files and will + # convert it to 8-bit without the possibility to retrieve the original + # data + # https://github.com/python-pillow/Pillow/issues/1888 + # + # Some tiff images do not have BITSPERSAMPLE set. Use this to create + # such a tiff: tiffset -u 258 test.tif + if ( + imgformat == ImageFormat.TIFF + and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8 + ): + raise ValueError("PIL is unable to preserve more than 8 bits per sample") + + # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it + # only contains a single strip + if ( + imgformat == ImageFormat.TIFF + and imgdata.info["compression"] == "group4" + and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1 + ): + photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION] + inverted = False + if photo == 0: + inverted = True + elif photo != 1: + raise ValueError( + "unsupported photometric interpretation for " + "group4 tiff: %d" % photo + ) + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata + ) + offset, length = ccitt_payload_location_from_pil(imgdata) + im.seek(offset) + rawdata = im.read(length) + fillorder = imgdata.tag_v2.get(TiffImagePlugin.FILLORDER) + if fillorder is None: + # no FillOrder: nothing to do + pass + elif fillorder == 1: + # msb-to-lsb: nothing to do + pass + elif fillorder == 2: + logging.debug("fillorder is lsb-to-msb => reverse bits") + # lsb-to-msb: reverse bits of each byte + rawdata = bytearray(rawdata) + for i in range(len(rawdata)): + rawdata[i] = TIFFBitRevTable[rawdata[i]] + rawdata = bytes(rawdata) + else: + raise ValueError("unsupported FillOrder: %d" % fillorder) + logging.debug("read_images() embeds Group4 from TIFF") + result.append( + ( + color, + ndpi, + ImageFormat.CCITTGroup4, + rawdata, + imgwidthpx, + imgheightpx, + [], + inverted, + 1, + rotation, + ) + ) + img_page_count += 1 + continue + + logging.debug("Converting frame: %d" % img_page_count) + + color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace + ) + + newimg = None + if color == Colorspace["1"]: + try: + ccittdata = transcode_monochrome(imgdata) + logging.debug("read_images() encoded a B/W image as CCITT group 4") + result.append( + ( + color, + ndpi, + ImageFormat.CCITTGroup4, + ccittdata, + imgwidthpx, + imgheightpx, + [], + False, + 1, + rotation, + ) + ) + img_page_count += 1 + continue + except Exception as e: + logging.debug(e) + logging.debug("Converting colorspace 1 to L") + newimg = imgdata.convert("L") + color = Colorspace.L + elif color in [ + Colorspace.RGB, + Colorspace.L, + Colorspace.CMYK, + Colorspace["CMYK;I"], + Colorspace.P, + ]: + logging.debug("Colorspace is OK: %s", color) + newimg = imgdata + else: + raise ValueError("unknown or unsupported colorspace: %s" % color.name) + # the PNG format does not support CMYK, so we fall back to normal + # compression + if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]: + imggz = zlib.compress(newimg.tobytes()) + logging.debug("read_images() encoded CMYK with flate compression") + result.append( + ( + color, + ndpi, + imgformat, + imggz, + imgwidthpx, + imgheightpx, + [], + False, + 8, + rotation, + ) + ) + else: + # cheapo version to retrieve a PNG encoding of the payload is to + # just save it with PIL. In the future this could be replaced by + # dedicated function applying the Paeth PNG filter to the raw pixel + pngbuffer = BytesIO() + newimg.save(pngbuffer, format="png") + pngidat, palette = parse_png(pngbuffer.getvalue()) + # PIL does not provide the information about the original bits per + # sample. Thus, we retrieve that info manually by looking at byte 9 in + # the IHDR chunk. We know where to find that in the file because the + # IHDR chunk must be the first chunk + pngbuffer.seek(24) + depth = ord(pngbuffer.read(1)) + if depth not in [1, 2, 4, 8, 16]: + raise ValueError("invalid bit depth: %d" % depth) + logging.debug("read_images() encoded an image as PNG") + result.append( + ( + color, + ndpi, + ImageFormat.PNG, + pngidat, + imgwidthpx, + imgheightpx, + palette, + False, + depth, + rotation, + ) + ) + img_page_count += 1 + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the + # close() method + try: + imgdata.close() + except AttributeError: + pass + im.close() + return result + + +# converts a length in pixels to a length in PDF units (1/72 of an inch) +def px_to_pt(length, dpi): + return 72.0 * length / dpi + + +def cm_to_pt(length): + return (72.0 * length) / 2.54 + + +def mm_to_pt(length): + return (72.0 * length) / 25.4 + + +def in_to_pt(length): + return 72.0 * length + + +def get_layout_fun( + pagesize=None, imgsize=None, border=None, fit=None, auto_orient=False +): + def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight): + if fitwidth is None and fitheight is None: + raise ValueError("fitwidth and fitheight cannot both be None") + # if fit is fill or enlarge then it is okay if one of the dimensions + # are negative but one of them must still be positive + # if fit is not fill or enlarge then both dimensions must be positive + if ( + fit in [FitMode.fill, FitMode.enlarge] + and fitwidth is not None + and fitwidth < 0 + and fitheight is not None + and fitheight < 0 + ): + raise ValueError( + "cannot fit into a rectangle where both " "dimensions are negative" + ) + elif fit not in [FitMode.fill, FitMode.enlarge] and ( + (fitwidth is not None and fitwidth < 0) + or (fitheight is not None and fitheight < 0) + ): + raise Exception( + "cannot fit into a rectangle where either " "dimensions are negative" + ) + + def default(): + if fitwidth is not None and fitheight is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight) / imgwidth + if newimgheight > fitheight: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth) / imgheight + elif fitwidth is None and fitheight is not None: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth) / imgheight + elif fitheight is None and fitwidth is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight) / imgwidth + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return newimgwidth, newimgheight + + if fit is None or fit == FitMode.into: + return default() + elif fit == FitMode.fill: + if fitwidth is not None and fitheight is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight) / imgwidth + if newimgheight < fitheight: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth) / imgheight + elif fitwidth is None and fitheight is not None: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth) / imgheight + elif fitheight is None and fitwidth is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight) / imgwidth + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return newimgwidth, newimgheight + elif fit == FitMode.exact: + if fitwidth is not None and fitheight is not None: + return fitwidth, fitheight + elif fitwidth is None and fitheight is not None: + newimgheight = fitheight + newimgwidth = (newimgheight * imgwidth) / imgheight + elif fitheight is None and fitwidth is not None: + newimgwidth = fitwidth + newimgheight = (newimgwidth * imgheight) / imgwidth + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return newimgwidth, newimgheight + elif fit == FitMode.shrink: + if fitwidth is not None and fitheight is not None: + if imgwidth <= fitwidth and imgheight <= fitheight: + return imgwidth, imgheight + elif fitwidth is None and fitheight is not None: + if imgheight <= fitheight: + return imgwidth, imgheight + elif fitheight is None and fitwidth is not None: + if imgwidth <= fitwidth: + return imgwidth, imgheight + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return default() + elif fit == FitMode.enlarge: + if fitwidth is not None and fitheight is not None: + if imgwidth > fitwidth or imgheight > fitheight: + return imgwidth, imgheight + elif fitwidth is None and fitheight is not None: + if imgheight > fitheight: + return imgwidth, imgheight + elif fitheight is None and fitwidth is not None: + if imgwidth > fitwidth: + return imgwidth, imgheight + else: + raise ValueError("fitwidth and fitheight cannot both be None") + return default() + else: + raise NotImplementedError + + # if no layout arguments are given, then the image size is equal to the + # page size and will be drawn with the default dpi + if pagesize is None and imgsize is None and border is None: + return default_layout_fun + if pagesize is None and imgsize is None and border is not None: + + def layout_fun(imgwidthpx, imgheightpx, ndpi): + imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0]) + imgheightpdf = px_to_pt(imgheightpx, ndpi[1]) + pagewidth = imgwidthpdf + 2 * border[1] + pageheight = imgheightpdf + 2 * border[0] + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + + return layout_fun + if border is None: + border = (0, 0) + # if the pagesize is given but the imagesize is not, then the imagesize + # will be calculated from the pagesize, taking into account the border + # and the fitting + if pagesize is not None and imgsize is None: + + def layout_fun(imgwidthpx, imgheightpx, ndpi): + if ( + pagesize[0] is not None + and pagesize[1] is not None + and auto_orient + and ( + (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1]) + or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1]) + ) + ): + pagewidth, pageheight = pagesize[1], pagesize[0] + newborder = border[1], border[0] + else: + pagewidth, pageheight = pagesize[0], pagesize[1] + newborder = border + if pagewidth is not None: + fitwidth = pagewidth - 2 * newborder[1] + else: + fitwidth = None + if pageheight is not None: + fitheight = pageheight - 2 * newborder[0] + else: + fitheight = None + if ( + fit in [FitMode.fill, FitMode.enlarge] + and fitwidth is not None + and fitwidth < 0 + and fitheight is not None + and fitheight < 0 + ): + raise NegativeDimensionError( + "at least one border dimension musts be smaller than half " + "the respective page dimension" + ) + elif fit not in [FitMode.fill, FitMode.enlarge] and ( + (fitwidth is not None and fitwidth < 0) + or (fitheight is not None and fitheight < 0) + ): + raise NegativeDimensionError( + "one border dimension is larger than half of the " + "respective page dimension" + ) + imgwidthpdf, imgheightpdf = fitfun( + fit, + px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + fitwidth, + fitheight, + ) + if pagewidth is None: + pagewidth = imgwidthpdf + border[1] * 2 + if pageheight is None: + pageheight = imgheightpdf + border[0] * 2 + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + + return layout_fun + + def scale_imgsize(s, px, dpi): + if s is None: + return None + mode, value = s + if mode == ImgSize.abs: + return value + if mode == ImgSize.perc: + return (px_to_pt(px, dpi) * value) / 100 + if mode == ImgSize.dpi: + return px_to_pt(px, value) + raise NotImplementedError + + if pagesize is None and imgsize is not None: + + def layout_fun(imgwidthpx, imgheightpx, ndpi): + imgwidthpdf, imgheightpdf = fitfun( + fit, + px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), + scale_imgsize(imgsize[1], imgheightpx, ndpi[1]), + ) + pagewidth = imgwidthpdf + 2 * border[1] + pageheight = imgheightpdf + 2 * border[0] + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + + return layout_fun + if pagesize is not None and imgsize is not None: + + def layout_fun(imgwidthpx, imgheightpx, ndpi): + if ( + pagesize[0] is not None + and pagesize[1] is not None + and auto_orient + and ( + (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1]) + or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1]) + ) + ): + pagewidth, pageheight = pagesize[1], pagesize[0] + else: + pagewidth, pageheight = pagesize[0], pagesize[1] + imgwidthpdf, imgheightpdf = fitfun( + fit, + px_to_pt(imgwidthpx, ndpi[0]), + px_to_pt(imgheightpx, ndpi[1]), + scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), + scale_imgsize(imgsize[1], imgheightpx, ndpi[1]), + ) + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + + return layout_fun + raise NotImplementedError + + +def default_layout_fun(imgwidthpx, imgheightpx, ndpi): + imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0]) + imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1]) + return pagewidth, pageheight, imgwidthpdf, imgheightpdf + + +def get_fixed_dpi_layout_fun(fixed_dpi): + """Layout function that overrides whatever DPI is claimed in input images. + + >>> layout_fun = get_fixed_dpi_layout_fun((300, 300)) + >>> convert(image1, layout_fun=layout_fun, ... outputstream=...) + """ + + def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi): + return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi) + + return fixed_dpi_layout_fun + + +def find_scale(pagewidth, pageheight): + """Find the power of 10 (10, 100, 1000...) that will reduce the scale + below the PDF specification limit of 14400 PDF units (=200 inches)""" + from math import log10, ceil + + major = max(pagewidth, pageheight) + oversized = major / 14400.0 + + return 10 ** ceil(log10(oversized)) + + +# given one or more input image, depending on outputstream, either return a +# string containing the whole PDF if outputstream is None or write the PDF +# data to the given file-like object and return None +# +# Input images can be given as file like objects (they must implement read()), +# as a binary string representing the image content or as filenames to the +# images. +def convert(*images, **kwargs): + + _default_kwargs = dict( + title=None, + author=None, + creator=None, + producer=None, + creationdate=None, + moddate=None, + subject=None, + keywords=None, + colorspace=None, + nodate=False, + layout_fun=default_layout_fun, + viewer_panes=None, + viewer_initial_page=None, + viewer_magnification=None, + viewer_page_layout=None, + viewer_fit_window=False, + viewer_center_window=False, + viewer_fullscreen=False, + with_pdfrw=True, + outputstream=None, + first_frame_only=False, + allow_oversized=True, + ) + for kwname, default in _default_kwargs.items(): + if kwname not in kwargs: + kwargs[kwname] = default + + pdf = pdfdoc( + "1.3", + kwargs["title"], + kwargs["author"], + kwargs["creator"], + kwargs["producer"], + kwargs["creationdate"], + kwargs["moddate"], + kwargs["subject"], + kwargs["keywords"], + kwargs["nodate"], + kwargs["viewer_panes"], + kwargs["viewer_initial_page"], + kwargs["viewer_magnification"], + kwargs["viewer_page_layout"], + kwargs["viewer_fit_window"], + kwargs["viewer_center_window"], + kwargs["viewer_fullscreen"], + kwargs["with_pdfrw"], + ) + + # backwards compatibility with older img2pdf versions where the first + # argument to the function had to be given as a list + if len(images) == 1: + # if only one argument was given and it is a list, expand it + if isinstance(images[0], (list, tuple)): + images = images[0] + + if not isinstance(images, (list, tuple)): + images = [images] + + for img in images: + # img is allowed to be a path, a binary string representing image data + # or a file-like object (really anything that implements read()) + try: + rawdata = img.read() + except AttributeError: + if not isinstance(img, (str, bytes)): + raise TypeError("Neither implements read() nor is str or bytes") + # the thing doesn't have a read() function, so try if we can treat + # it as a file name + try: + with open(img, "rb") as f: + rawdata = f.read() + except Exception: + # whatever the exception is (string could contain NUL + # characters or the path could just not exist) it's not a file + # name so we now try treating it as raw image content + rawdata = img + + for ( + color, + ndpi, + imgformat, + imgdata, + imgwidthpx, + imgheightpx, + palette, + inverted, + depth, + rotation, + ) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]): + pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"]( + imgwidthpx, imgheightpx, ndpi + ) + + userunit = None + if pagewidth < 3.00 or pageheight < 3.00: + logging.warning( + "pdf width or height is below 3.00 - too " "small for some viewers!" + ) + elif pagewidth > 14400.0 or pageheight > 14400.0: + if kwargs["allow_oversized"]: + userunit = find_scale(pagewidth, pageheight) + pagewidth /= userunit + pageheight /= userunit + imgwidthpdf /= userunit + imgheightpdf /= userunit + else: + raise PdfTooLargeError( + "pdf width or height must not exceed 200 inches." + ) + # the image is always centered on the page + imgxpdf = (pagewidth - imgwidthpdf) / 2.0 + imgypdf = (pageheight - imgheightpdf) / 2.0 + pdf.add_imagepage( + color, + imgwidthpx, + imgheightpx, + imgformat, + imgdata, + imgwidthpdf, + imgheightpdf, + imgxpdf, + imgypdf, + pagewidth, + pageheight, + userunit, + palette, + inverted, + depth, + rotation, + ) + + if kwargs["outputstream"]: + pdf.tostream(kwargs["outputstream"]) + return + + return pdf.tostring() + + +def parse_num(num, name): + if num == "": + return None + unit = None + if num.endswith("pt"): + unit = Unit.pt + elif num.endswith("cm"): + unit = Unit.cm + elif num.endswith("mm"): + unit = Unit.mm + elif num.endswith("in"): + unit = Unit.inch + else: + try: + num = float(num) + except ValueError: + msg = ( + "%s is not a floating point number and doesn't have a " + "valid unit: %s" % (name, num) + ) + raise argparse.ArgumentTypeError(msg) + if unit is None: + unit = Unit.pt + else: + num = num[:-2] + try: + num = float(num) + except ValueError: + msg = "%s is not a floating point number: %s" % (name, num) + raise argparse.ArgumentTypeError(msg) + if unit == Unit.cm: + num = cm_to_pt(num) + elif unit == Unit.mm: + num = mm_to_pt(num) + elif unit == Unit.inch: + num = in_to_pt(num) + return num + + +def parse_imgsize_num(num, name): + if num == "": + return None + unit = None + if num.endswith("pt"): + unit = ImgUnit.pt + elif num.endswith("cm"): + unit = ImgUnit.cm + elif num.endswith("mm"): + unit = ImgUnit.mm + elif num.endswith("in"): + unit = ImgUnit.inch + elif num.endswith("dpi"): + unit = ImgUnit.dpi + elif num.endswith("%"): + unit = ImgUnit.perc + else: + try: + num = float(num) + except ValueError: + msg = ( + "%s is not a floating point number and doesn't have a " + "valid unit: %s" % (name, num) + ) + raise argparse.ArgumentTypeError(msg) + if unit is None: + unit = ImgUnit.pt + else: + # strip off unit from string + if unit == ImgUnit.dpi: + num = num[:-3] + elif unit == ImgUnit.perc: + num = num[:-1] + else: + num = num[:-2] + try: + num = float(num) + except ValueError: + msg = "%s is not a floating point number: %s" % (name, num) + raise argparse.ArgumentTypeError(msg) + if unit == ImgUnit.cm: + num = (ImgSize.abs, cm_to_pt(num)) + elif unit == ImgUnit.mm: + num = (ImgSize.abs, mm_to_pt(num)) + elif unit == ImgUnit.inch: + num = (ImgSize.abs, in_to_pt(num)) + elif unit == ImgUnit.pt: + num = (ImgSize.abs, num) + elif unit == ImgUnit.dpi: + num = (ImgSize.dpi, num) + elif unit == ImgUnit.perc: + num = (ImgSize.perc, num) + return num + + +def parse_pagesize_rectarg(string): + transposed = string.endswith("^T") + if transposed: + string = string[:-2] + if papersizes.get(string.lower()): + string = papersizes[string.lower()] + if "x" not in string: + # if there is no separating "x" in the string, then the string is + # interpreted as the width + w = parse_num(string, "width") + h = None + else: + w, h = string.split("x", 1) + w = parse_num(w, "width") + h = parse_num(h, "height") + if transposed: + w, h = h, w + if w is None and h is None: + raise argparse.ArgumentTypeError("at least one dimension must be " "specified") + return w, h + + +def parse_imgsize_rectarg(string): + transposed = string.endswith("^T") + if transposed: + string = string[:-2] + if papersizes.get(string.lower()): + string = papersizes[string.lower()] + if "x" not in string: + # if there is no separating "x" in the string, then the string is + # interpreted as the width + w = parse_imgsize_num(string, "width") + h = None + else: + w, h = string.split("x", 1) + w = parse_imgsize_num(w, "width") + h = parse_imgsize_num(h, "height") + if transposed: + w, h = h, w + if w is None and h is None: + raise argparse.ArgumentTypeError("at least one dimension must be " "specified") + return w, h + + +def parse_colorspacearg(string): + for c in Colorspace: + if c.name == string: + return c + allowed = ", ".join([c.name for c in Colorspace]) + raise argparse.ArgumentTypeError( + "Unsupported colorspace: %s. Must be one " "of: %s." % (string, allowed) + ) + + +def parse_borderarg(string): + if ":" in string: + h, v = string.split(":", 1) + if h == "": + raise argparse.ArgumentTypeError("missing value before colon") + if v == "": + raise argparse.ArgumentTypeError("missing value after colon") + else: + if string == "": + raise argparse.ArgumentTypeError("border option cannot be empty") + h, v = string, string + h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border") + if h is None and v is None: + raise argparse.ArgumentTypeError("missing value") + return h, v + + +def input_images(path): + if path == "-": + # we slurp in all data from stdin because we need to seek in it later + if PY3: + result = sys.stdin.buffer.read() + else: + result = sys.stdin.read() + if len(result) == 0: + raise argparse.ArgumentTypeError('"%s" is empty' % path) + else: + if PY3: + try: + if os.path.getsize(path) == 0: + raise argparse.ArgumentTypeError('"%s" is empty' % path) + # test-read a byte from it so that we can abort early in case + # we cannot read data from the file + with open(path, "rb") as im: + im.read(1) + except IsADirectoryError: + raise argparse.ArgumentTypeError('"%s" is a directory' % path) + except PermissionError: + raise argparse.ArgumentTypeError('"%s" permission denied' % path) + except FileNotFoundError: + raise argparse.ArgumentTypeError('"%s" does not exist' % path) + else: + try: + if os.path.getsize(path) == 0: + raise argparse.ArgumentTypeError('"%s" is empty' % path) + # test-read a byte from it so that we can abort early in case + # we cannot read data from the file + with open(path, "rb") as im: + im.read(1) + except IOError as err: + raise argparse.ArgumentTypeError(str(err)) + except OSError as err: + raise argparse.ArgumentTypeError(str(err)) + result = path + return result + + +def parse_fitarg(string): + for m in FitMode: + if m.name == string.lower(): + return m + raise argparse.ArgumentTypeError("unknown fit mode: %s" % string) + + +def parse_panes(string): + for m in PageMode: + if m.name == string.lower(): + return m + allowed = ", ".join([m.name for m in PageMode]) + raise argparse.ArgumentTypeError( + "Unsupported page mode: %s. Must be one " "of: %s." % (string, allowed) + ) + + +def parse_magnification(string): + for m in Magnification: + if m.name == string.lower(): + return m + try: + return float(string) + except ValueError: + pass + allowed = ", ".join([m.name for m in Magnification]) + raise argparse.ArgumentTypeError( + "Unsupported magnification: %s. Must be " + "a floating point number or one of: %s." % (string, allowed) + ) + + +def parse_layout(string): + for l in PageLayout: + if l.name == string.lower(): + return l + allowed = ", ".join([l.name for l in PageLayout]) + raise argparse.ArgumentTypeError( + "Unsupported page layout: %s. Must be " "one of: %s." % (string, allowed) + ) + + +def valid_date(string): + # first try parsing in ISO8601 format + try: + return datetime.strptime(string, "%Y-%m-%d") + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M") + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S") + except ValueError: + pass + # then try dateutil + try: + from dateutil import parser + except ImportError: + pass + else: + try: + return parser.parse(string) + except TypeError: + pass + # as a last resort, try the local date utility + try: + import subprocess + except ImportError: + pass + else: + try: + utime = subprocess.check_output(["date", "--date", string, "+%s"]) + except subprocess.CalledProcessError: + pass + else: + return datetime.utcfromtimestamp(int(utime)) + raise argparse.ArgumentTypeError("cannot parse date: %s" % string) + + +def main(argv=sys.argv): + rendered_papersizes = "" + for k, v in sorted(papersizes.items()): + rendered_papersizes += " %-8s %s\n" % (papernames[k], v) + + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="""\ +Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and +JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000 +images with the only added file size coming from the PDF container itself. +Other raster graphics formats are losslessly stored using the same encoding +that PNG uses. Since PDF does not support images with transparency and since +img2pdf aims to never be lossy, input images with an alpha channel are not +supported. + +The output is sent to standard output so that it can be redirected into a file +or to another program as part of a shell pipe. To directly write the output +into a file, use the -o or --output option. + +Options: +""", + epilog="""\ +Colorspace: + Currently, the colorspace must be forced for JPEG 2000 images that are not in + the RGB colorspace. Available colorspace options are based on Python Imaging + Library (PIL) short handles. + + RGB RGB color + L Grayscale + 1 Black and white (internally converted to grayscale) + CMYK CMYK color + CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe) + +Paper sizes: + You can specify the short hand paper size names shown in the first column in + the table below as arguments to the --pagesize and --imgsize options. The + width and height they are mapping to is shown in the second column. Giving + the value in the second column has the same effect as giving the short hand + in the first column. Appending ^T (a caret/circumflex followed by the letter + T) turns the paper size from portrait into landscape. The postfix thus + symbolizes the transpose. The values are case insensitive. + +%s + +Fit options: + The img2pdf options for the --fit argument are shown in the first column in + the table below. The function of these options can be mapped to the geometry + operators of imagemagick. For users who are familiar with imagemagick, the + corresponding operator is shown in the second column. The third column shows + whether or not the aspect ratio is preserved for that option (same as in + imagemagick). Just like imagemagick, img2pdf tries hard to preserve the + aspect ratio, so if the --fit argument is not given, then the default is + "into" which corresponds to the absence of any operator in imagemagick. + The value of the --fit option is case insensitive. + + into | | Y | The default. Width and height values specify maximum + | | | values. + ---------+---+---+---------------------------------------------------------- + fill | ^ | Y | Width and height values specify the minimum values. + ---------+---+---+---------------------------------------------------------- + exact | ! | N | Width and height emphatically given. + ---------+---+---+---------------------------------------------------------- + shrink | > | Y | Shrinks an image with dimensions larger than the given + | | | ones (and otherwise behaves like "into"). + ---------+---+---+---------------------------------------------------------- + enlarge | < | Y | Enlarges an image with dimensions smaller than the given + | | | ones (and otherwise behaves like "into"). + +Argument parsing: + Argument long options can be abbreviated to a prefix if the abbreviation is + unambiguous. That is, the prefix must match a unique option. + + Beware of your shell interpreting argument values as special characters (like + the semicolon in the CMYK;I colorspace option). If in doubt, put the argument + values in single quotes. + + If you want an argument value to start with one or more minus characters, you + must use the long option name and join them with an equal sign like so: + + $ img2pdf --author=--test-- + + If your input file name starts with one or more minus characters, either + separate the input files from the other arguments by two minus signs: + + $ img2pdf -- --my-file-starts-with-two-minuses.jpg + + Or be more explicit about its relative path by prepending a ./: + + $ img2pdf ./--my-file-starts-with-two-minuses.jpg + + The order of non-positional arguments (all arguments other than the input + images) does not matter. + +Examples: + Lines starting with a dollar sign denote commands you can enter into your + terminal. The dollar sign signifies your command prompt. It is not part of + the command you type. + + Convert two scans in JPEG format to a PDF document. + + $ img2pdf --output out.pdf page1.jpg page2.jpg + + Convert a directory of JPEG images into a PDF with printable A4 pages in + landscape mode. On each page, the photo takes the maximum amount of space + while preserving its aspect ratio and a print border of 2 cm on the top and + bottom and 2.5 cm on the left and right hand side. + + $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg + + On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the + original image size if the image is smaller than that. + + $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg + + Prepare a directory of photos to be printed borderless on photo paper with a + 3:2 aspect ratio and rotate each page so that its orientation is the same as + the input image. + + $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg + + Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf + cannot read it from the JPEG2000 file automatically. + + $ img2pdf --output out.pdf --colorspace L input.jp2 + +Written by Johannes 'josch' Schauer + +Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues +""" + % rendered_papersizes, + ) + + parser.add_argument( + "images", + metavar="infile", + type=input_images, + nargs="*", + help="Specifies the input file(s) in any format that can be read by " + "the Python Imaging Library (PIL). If no input images are given, then " + 'a single image is read from standard input. The special filename "-" ' + "can be used once to read an image from standard input. To read a " + 'file in the current directory with the filename "-", pass it to ' + 'img2pdf by explicitly stating its relative path like "./-".', + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Makes the program operate in verbose mode, printing messages on " + "standard error.", + ) + parser.add_argument( + "-V", + "--version", + action="version", + version="%(prog)s " + __version__, + help="Prints version information and exits.", + ) + + outargs = parser.add_argument_group( + title="General output arguments", + description="Arguments controlling the output format.", + ) + + # In Python3 we have to output to sys.stdout.buffer because we write are + # bytes and not strings. In certain situations, like when the main + # function is wrapped by contextlib.redirect_stdout(), sys.stdout does not + # have the buffer attribute. Thus we write to sys.stdout by default and + # to sys.stdout.buffer if it exists. + outargs.add_argument( + "-o", + "--output", + metavar="out", + type=argparse.FileType("wb"), + default=sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout, + help="Makes the program output to a file instead of standard output.", + ) + outargs.add_argument( + "-C", + "--colorspace", + metavar="colorspace", + type=parse_colorspacearg, + help=""" +Forces the PIL colorspace. See the epilogue for a list of possible values. +Usually the PDF colorspace would be derived from the color space of the input +image. This option overwrites the automatically detected colorspace from the +input image and thus forces a certain colorspace in the output PDF /ColorSpace +property. This is useful for JPEG 2000 images with a different colorspace than +RGB.""", + ) + + outargs.add_argument( + "-D", + "--nodate", + action="store_true", + help="Suppresses timestamps in the output and thus makes the output " + "deterministic between individual runs. You can also manually " + "set a date using the --moddate and --creationdate options.", + ) + + outargs.add_argument( + "--without-pdfrw", + action="store_true", + help="By default, img2pdf uses the pdfrw library to create the output " + "PDF if pdfrw is available. If you want to use the internal PDF " + "generator of img2pdf even if pdfrw is present, then pass this " + "option. This can be useful if you want to have unicode metadata " + "values which pdfrw does not yet support (See " + "https://github.com/pmaupin/pdfrw/issues/39) or if you want the " + "PDF code to be more human readable.", + ) + + outargs.add_argument( + "--first-frame-only", + action="store_true", + help="By default, img2pdf will convert multi-frame images like " + "multi-page TIFF or animated GIF images to one page per frame. " + "This option will only let the first frame of every multi-frame " + "input image be converted into a page in the resulting PDF.", + ) + + outargs.add_argument( + "--pillow-limit-break", + action="store_true", + help="img2pdf uses the Python Imaging Library Pillow to read input " + "images. Pillow limits the maximum input image size to %d pixels " + "to prevent decompression bomb denial of service attacks. If " + "your input image contains more pixels than that, use this " + "option to disable this safety measure during this run of img2pdf" + % Image.MAX_IMAGE_PIXELS, + ) + + sizeargs = parser.add_argument_group( + title="Image and page size and layout arguments", + description="""\ +Every input image will be placed on its own page. The image size is controlled +by the dpi value of the input image or, if unset or missing, the default dpi of +%.2f. By default, each page will have the same size as the image it shows. +Thus, there will be no visible border between the image and the page border by +default. If image size and page size are made different from each other by the +options in this section, the image will always be centered in both dimensions. + +The image size and page size can be explicitly set using the --imgsize and +--pagesize options, respectively. If either dimension of the image size is +specified but the same dimension of the page size is not, then the latter will +be derived from the former using an optional minimal distance between the image +and the page border (given by the --border option) and/or a certain fitting +strategy (given by the --fit option). The converse happens if a dimension of +the page size is set but the same dimension of the image size is not. + +Any length value in below options is represented by the meta variable L which +is a floating point value with an optional unit appended (without a space +between them). The default unit is pt (1/72 inch, the PDF unit) and other +allowed units are cm (centimeter), mm (millimeter), and in (inch). + +Any size argument of the format LxL in the options below specifies the width +and height of a rectangle where the first L represents the width and the second +L represents the height with an optional unit following each value as described +above. Either width or height may be omitted. If the height is omitted, the +separating x can be omitted as well. Omitting the width requires to prefix the +height with the separating x. The missing dimension will be chosen so to not +change the image aspect ratio. Instead of giving the width and height +explicitly, you may also specify some (case-insensitive) common page sizes such +as letter and A4. See the epilogue at the bottom for a complete list of the +valid sizes. + +The --fit option scales to fit the image into a rectangle that is either +derived from the --imgsize option or otherwise from the --pagesize option. +If the --border option is given in addition to the --imgsize option while the +--pagesize option is not given, then the page size will be calculated from the +image size, respecting the border setting. If the --border option is given in +addition to the --pagesize option while the --imgsize option is not given, then +the image size will be calculated from the page size, respecting the border +setting. If the --border option is given while both the --pagesize and +--imgsize options are passed, then the --border option will be ignored. + +""" + % default_dpi, + ) + + sizeargs.add_argument( + "-S", + "--pagesize", + metavar="LxL", + type=parse_pagesize_rectarg, + help=""" +Sets the size of the PDF pages. The short-option is the upper case S because +it is an mnemonic for being bigger than the image size.""", + ) + + sizeargs.add_argument( + "-s", + "--imgsize", + metavar="LxL", + type=parse_imgsize_rectarg, + help=""" +Sets the size of the images on the PDF pages. In addition, the unit dpi is +allowed which will set the image size as a value of dots per inch. Instead of +a unit, width and height values may also have a percentage sign appended, +indicating a resize of the image by that percentage. The short-option is the +lower case s because it is an mnemonic for being smaller than the page size. +""", + ) + sizeargs.add_argument( + "-b", + "--border", + metavar="L[:L]", + type=parse_borderarg, + help=""" +Specifies the minimal distance between the image border and the PDF page +border. This value Is overwritten by explicit values set by --pagesize or +--imgsize. The value will be used when calculating page dimensions from the +image dimensions or the other way round. One, or two length values can be given +as an argument, separated by a colon. One value specifies the minimal border on +all four sides. Two values specify the minimal border on the top/bottom and +left/right, respectively. It is not possible to specify asymmetric borders +because images will always be centered on the page. +""", + ) + sizeargs.add_argument( + "-f", + "--fit", + metavar="FIT", + type=parse_fitarg, + default=FitMode.into, + help=""" + +If --imgsize is given, fits the image using these dimensions. Otherwise, fit +the image into the dimensions given by --pagesize. FIT is one of into, fill, +exact, shrink and enlarge. The default value is "into". See the epilogue at the +bottom for a description of the FIT options. + +""", + ) + sizeargs.add_argument( + "-a", + "--auto-orient", + action="store_true", + help=""" +If both dimensions of the page are given via --pagesize, conditionally swaps +these dimensions such that the page orientation is the same as the orientation +of the input image. If the orientation of a page gets flipped, then so do the +values set via the --border option. +""", + ) + + metaargs = parser.add_argument_group( + title="Arguments setting metadata", + description="Options handling embedded timestamps, title and author " + "information.", + ) + metaargs.add_argument( + "--title", metavar="title", type=str, help="Sets the title metadata value" + ) + metaargs.add_argument( + "--author", metavar="author", type=str, help="Sets the author metadata value" + ) + metaargs.add_argument( + "--creator", metavar="creator", type=str, help="Sets the creator metadata value" + ) + metaargs.add_argument( + "--producer", + metavar="producer", + type=str, + default="img2pdf " + __version__, + help="Sets the producer metadata value " + "(default is: img2pdf " + __version__ + ")", + ) + metaargs.add_argument( + "--creationdate", + metavar="creationdate", + type=valid_date, + help="Sets the UTC creation date metadata value in YYYY-MM-DD or " + "YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format " + "understood by python dateutil module or any format understood " + "by `date --date`", + ) + metaargs.add_argument( + "--moddate", + metavar="moddate", + type=valid_date, + help="Sets the UTC modification date metadata value in YYYY-MM-DD " + "or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format " + "understood by python dateutil module or any format understood " + "by `date --date`", + ) + metaargs.add_argument( + "--subject", metavar="subject", type=str, help="Sets the subject metadata value" + ) + metaargs.add_argument( + "--keywords", + metavar="kw", + type=str, + nargs="+", + help="Sets the keywords metadata value (can be given multiple times)", + ) + + viewerargs = parser.add_argument_group( + title="PDF viewer arguments", + description="PDF files can specify how they are meant to be " + "presented to the user by a PDF viewer", + ) + + viewerargs.add_argument( + "--viewer-panes", + metavar="PANES", + type=parse_panes, + help="Instruct the PDF viewer which side panes to show. Valid values " + 'are "outlines" and "thumbs". It is not possible to specify both ' + "at the same time.", + ) + viewerargs.add_argument( + "--viewer-initial-page", + metavar="NUM", + type=int, + help="Instead of showing the first page, instruct the PDF viewer to " + "show the given page instead. Page numbers start with 1.", + ) + viewerargs.add_argument( + "--viewer-magnification", + metavar="MAG", + type=parse_magnification, + help="Instruct the PDF viewer to open the PDF with a certain zoom " + "level. Valid values are either a floating point number giving " + 'the exact zoom level, "fit" (zoom to fit whole page), "fith" ' + '(zoom to fit page width) and "fitbh" (zoom to fit visible page ' + "width).", + ) + viewerargs.add_argument( + "--viewer-page-layout", + metavar="LAYOUT", + type=parse_layout, + help="Instruct the PDF viewer how to arrange the pages on the screen. " + 'Valid values are "single" (display single pages), "onecolumn" ' + '(one continuous column), "twocolumnright" (two continuous ' + 'columns with odd number pages on the right) and "twocolumnleft" ' + "(two continuous columns with odd numbered pages on the left)", + ) + viewerargs.add_argument( + "--viewer-fit-window", + action="store_true", + help="Instruct the PDF viewer to resize the window to fit the page " "size", + ) + viewerargs.add_argument( + "--viewer-center-window", + action="store_true", + help="Instruct the PDF viewer to center the PDF viewer window", + ) + viewerargs.add_argument( + "--viewer-fullscreen", + action="store_true", + help="Instruct the PDF viewer to open the PDF in fullscreen mode", + ) + + args = parser.parse_args(argv[1:]) + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + if args.pillow_limit_break: + Image.MAX_IMAGE_PIXELS = None + + layout_fun = get_layout_fun( + args.pagesize, args.imgsize, args.border, args.fit, args.auto_orient + ) + + # if no positional arguments were supplied, read a single image from + # standard input + if len(args.images) == 0: + logging.info("reading image from standard input") + try: + if PY3: + args.images = [sys.stdin.buffer.read()] + else: + args.images = [sys.stdin.read()] + except KeyboardInterrupt: + exit(0) + + # with the number of pages being equal to the number of images, the + # value passed to --viewer-initial-page must be between 1 and that number + if args.viewer_initial_page is not None: + if args.viewer_initial_page < 1: + parser.print_usage(file=sys.stderr) + logging.error( + "%s: error: argument --viewer-initial-page: must be " + "greater than zero" % parser.prog + ) + exit(2) + if args.viewer_initial_page > len(args.images): + parser.print_usage(file=sys.stderr) + logging.error( + "%s: error: argument --viewer-initial-page: must be " + "less than or equal to the total number of pages" % parser.prog + ) + exit(2) + + try: + convert( + *args.images, + title=args.title, + author=args.author, + creator=args.creator, + producer=args.producer, + creationdate=args.creationdate, + moddate=args.moddate, + subject=args.subject, + keywords=args.keywords, + colorspace=args.colorspace, + nodate=args.nodate, + layout_fun=layout_fun, + viewer_panes=args.viewer_panes, + viewer_initial_page=args.viewer_initial_page, + viewer_magnification=args.viewer_magnification, + viewer_page_layout=args.viewer_page_layout, + viewer_fit_window=args.viewer_fit_window, + viewer_center_window=args.viewer_center_window, + viewer_fullscreen=args.viewer_fullscreen, + with_pdfrw=not args.without_pdfrw, + outputstream=args.output, + first_frame_only=args.first_frame_only + ) + except Exception as e: + logging.error("error: " + str(e)) + if logging.getLogger().isEnabledFor(logging.DEBUG): + import traceback + + traceback.print_exc(file=sys.stderr) + exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/jp2.py b/src/jp2.py new file mode 100644 index 0000000..30edb7e --- /dev/null +++ b/src/jp2.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# +# Copyright (C) 2013 Johannes 'josch' Schauer +# +# this module is heavily based upon jpylyzer which is +# KB / National Library of the Netherlands, Open Planets Foundation +# and released under the same license conditions +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +import struct + + +def getBox(data, byteStart, noBytes): + boxLengthValue = struct.unpack(">I", data[byteStart:byteStart+4])[0] + boxType = data[byteStart+4:byteStart+8] + contentsStartOffset = 8 + if boxLengthValue == 1: + boxLengthValue = struct.unpack(">Q", data[byteStart+8:byteStart+16])[0] + contentsStartOffset = 16 + if boxLengthValue == 0: + boxLengthValue = noBytes-byteStart + byteEnd = byteStart + boxLengthValue + boxContents = data[byteStart+contentsStartOffset:byteEnd] + return (boxLengthValue, boxType, byteEnd, boxContents) + + +def parse_ihdr(data): + height = struct.unpack(">I", data[0:4])[0] + width = struct.unpack(">I", data[4:8])[0] + return width, height + + +def parse_colr(data): + meth = struct.unpack(">B", data[0:1])[0] + if meth != 1: + raise Exception("only enumerated color method supported") + enumCS = struct.unpack(">I", data[3:])[0] + if enumCS == 16: + return "RGB" + elif enumCS == 17: + return "L" + else: + raise Exception("only sRGB and greyscale color space is supported, " + "got %d" % enumCS) + + +def parse_resc(data): + hnum, hden, vnum, vden, hexp, vexp = struct.unpack(">HHHHBB", data) + hdpi = ((hnum/hden) * (10**hexp) * 100)/2.54 + vdpi = ((vnum/vden) * (10**vexp) * 100)/2.54 + return hdpi, vdpi + + +def parse_res(data): + hdpi, vdpi = None, None + noBytes = len(data) + byteStart = 0 + boxLengthValue = 1 # dummy value for while loop condition + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = \ + getBox(data, byteStart, noBytes) + if boxType == b'resc': + hdpi, vdpi = parse_resc(boxContents) + break + return hdpi, vdpi + + +def parse_jp2h(data): + width, height, colorspace, hdpi, vdpi = None, None, None, None, None + noBytes = len(data) + byteStart = 0 + boxLengthValue = 1 # dummy value for while loop condition + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = \ + getBox(data, byteStart, noBytes) + if boxType == b'ihdr': + width, height = parse_ihdr(boxContents) + elif boxType == b'colr': + colorspace = parse_colr(boxContents) + elif boxType == b'res ': + hdpi, vdpi = parse_res(boxContents) + byteStart = byteEnd + return (width, height, colorspace, hdpi, vdpi) + + +def parsejp2(data): + noBytes = len(data) + byteStart = 0 + boxLengthValue = 1 # dummy value for while loop condition + width, height, colorspace, hdpi, vdpi = None, None, None, None, None + while byteStart < noBytes and boxLengthValue != 0: + boxLengthValue, boxType, byteEnd, boxContents = \ + getBox(data, byteStart, noBytes) + if boxType == b'jp2h': + width, height, colorspace, hdpi, vdpi = parse_jp2h(boxContents) + break + byteStart = byteEnd + if not width: + raise Exception("no width in jp2 header") + if not height: + raise Exception("no height in jp2 header") + if not colorspace: + raise Exception("no colorspace in jp2 header") + # retrieving the dpi is optional so we do not error out if not present + return (width, height, colorspace, hdpi, vdpi) + + +if __name__ == "__main__": + import sys + width, height, colorspace = parsejp2(open(sys.argv[1]).read()) + sys.stdout.write("width = %d" % width) + sys.stdout.write("height = %d" % height) + sys.stdout.write("colorspace = %s" % colorspace) diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 0000000..807aa84 --- /dev/null +++ b/src/tests/__init__.py @@ -0,0 +1,732 @@ +import unittest + +import img2pdf +import os +import struct +import sys +import zlib +from PIL import Image +from io import StringIO, BytesIO, TextIOWrapper + +HERE = os.path.dirname(__file__) + +PY3 = sys.version_info[0] >= 3 + +if PY3: + PdfReaderIO = StringIO +else: + PdfReaderIO = BytesIO + +# Recompressing the image stream makes the comparison robust against output +# preserving changes in the zlib compress output bitstream +# (e.g. between different zlib implementations/versions/releases). +# Without this, some img2pdf 0.3.2 tests fail on Fedora 29/aarch64. +# See also: +# https://gitlab.mister-muffin.de/josch/img2pdf/issues/51 +# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/R7GD4L5Z6HELCDAL2RDESWR2F3ZXHWVX/ +def recompress_last_stream(bs): + length_pos = bs.rindex(b'/Length') + li = length_pos + 8 + lj = bs.index(b' ', li) + n = int(bs[li:lj]) + stream_pos = bs.index(b'\nstream\n', lj) + si = stream_pos + 8 + sj = si + n + startx_pos = bs.rindex(b'\nstartxref\n') + xi = startx_pos + 11 + xj = bs.index(b'\n', xi) + m = int(bs[xi:xj]) + + unc_t = zlib.decompress(bs[si:sj]) + t = zlib.compress(unc_t) + + new_len = str(len(t)).encode('ascii') + u = (lj-li) + n + v = len(new_len) + len(t) + off = v - u + + rs = (bs[:li] + new_len + bs[lj:si] + t + bs[sj:xi] + + str(m+off).encode('ascii') + bs[xj:]) + + return rs + +def compare_pdf(outx, outy): + if b'/FlateDecode' in outx: + x = recompress_last_stream(outx) + y = recompress_last_stream(outy) + if x != y: + print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr) + print('recompressed outx:\n{}\nouty:\n{}\n'.format(x, y), file=sys.stderr) + return False + else: + if outx != outy: + print('original outx:\n{}\nouty:\n{}\n'.format(outx, outy), file=sys.stderr) + return True + +# convert +set date:create +set date:modify -define png:exclude-chunk=time + +# we define some variables so that the table below can be narrower +psl = (972, 504) # --pagesize landscape +psp = (504, 972) # --pagesize portrait +isl = (756, 324) # --imgsize landscape +isp = (324, 756) # --imgsize portrait +border = (162, 270) # --border +poster = (97200, 50400) +# there is no need to have test cases with the same images with inverted +# orientation (landscape/portrait) because --pagesize and --imgsize are +# already inverted +im1 = (864, 288) # imgpx #1 => 648x216 +im2 = (1152, 576) # imgpx #2 => 864x432 +# shortcuts for fit modes +f_into = img2pdf.FitMode.into +f_fill = img2pdf.FitMode.fill +f_exact = img2pdf.FitMode.exact +f_shrink = img2pdf.FitMode.shrink +f_enlarge = img2pdf.FitMode.enlarge +layout_test_cases = [ + # psp=972x504, psl=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit + (None, None, None, f_into, 0, (648, 216), (648, 216), # 000 + (864, 432), (864, 432)), + (None, None, None, f_into, 1, (648, 216), (648, 216), # 001 + (864, 432), (864, 432)), + (None, None, None, f_fill, 0, (648, 216), (648, 216), # 002 + (864, 432), (864, 432)), + (None, None, None, f_fill, 1, (648, 216), (648, 216), # 003 + (864, 432), (864, 432)), + (None, None, None, f_exact, 0, (648, 216), (648, 216), # 004 + (864, 432), (864, 432)), + (None, None, None, f_exact, 1, (648, 216), (648, 216), # 005 + (864, 432), (864, 432)), + (None, None, None, f_shrink, 0, (648, 216), (648, 216), # 006 + (864, 432), (864, 432)), + (None, None, None, f_shrink, 1, (648, 216), (648, 216), # 007 + (864, 432), (864, 432)), + (None, None, None, f_enlarge, 0, (648, 216), (648, 216), # 008 + (864, 432), (864, 432)), + (None, None, None, f_enlarge, 1, (648, 216), (648, 216), # 009 + (864, 432), (864, 432)), + (None, None, border, f_into, 0, (1188, 540), (648, 216), # 010 + (1404, 756), (864, 432)), + (None, None, border, f_into, 1, (1188, 540), (648, 216), # 011 + (1404, 756), (864, 432)), + (None, None, border, f_fill, 0, (1188, 540), (648, 216), # 012 + (1404, 756), (864, 432)), + (None, None, border, f_fill, 1, (1188, 540), (648, 216), # 013 + (1404, 756), (864, 432)), + (None, None, border, f_exact, 0, (1188, 540), (648, 216), # 014 + (1404, 756), (864, 432)), + (None, None, border, f_exact, 1, (1188, 540), (648, 216), # 015 + (1404, 756), (864, 432)), + (None, None, border, f_shrink, 0, (1188, 540), (648, 216), # 016 + (1404, 756), (864, 432)), + (None, None, border, f_shrink, 1, (1188, 540), (648, 216), # 017 + (1404, 756), (864, 432)), + (None, None, border, f_enlarge, 0, (1188, 540), (648, 216), # 018 + (1404, 756), (864, 432)), + (None, None, border, f_enlarge, 1, (1188, 540), (648, 216), # 019 + (1404, 756), (864, 432)), + (None, isp, None, f_into, 0, (324, 108), (324, 108), # 020 + (324, 162), (324, 162)), + (None, isp, None, f_into, 1, (324, 108), (324, 108), # 021 + (324, 162), (324, 162)), + (None, isp, None, f_fill, 0, (2268, 756), (2268, 756), # 022 + (1512, 756), (1512, 756)), + (None, isp, None, f_fill, 1, (2268, 756), (2268, 756), # 023 + (1512, 756), (1512, 756)), + (None, isp, None, f_exact, 0, (324, 756), (324, 756), # 024 + (324, 756), (324, 756)), + (None, isp, None, f_exact, 1, (324, 756), (324, 756), # 025 + (324, 756), (324, 756)), + (None, isp, None, f_shrink, 0, (324, 108), (324, 108), # 026 + (324, 162), (324, 162)), + (None, isp, None, f_shrink, 1, (324, 108), (324, 108), # 027 + (324, 162), (324, 162)), + (None, isp, None, f_enlarge, 0, (648, 216), (648, 216), # 028 + (864, 432), (864, 432)), + (None, isp, None, f_enlarge, 1, (648, 216), (648, 216), # 029 + (864, 432), (864, 432)), + (None, isp, border, f_into, 0, (864, 432), (324, 108), # 030 + (864, 486), (324, 162)), + (None, isp, border, f_into, 1, (864, 432), (324, 108), # 031 + (864, 486), (324, 162)), + (None, isp, border, f_fill, 0, (2808, 1080), (2268, 756), # 032 + (2052, 1080), (1512, 756)), + (None, isp, border, f_fill, 1, (2808, 1080), (2268, 756), # 033 + (2052, 1080), (1512, 756)), + (None, isp, border, f_exact, 0, (864, 1080), (324, 756), # 034 + (864, 1080), (324, 756)), + (None, isp, border, f_exact, 1, (864, 1080), (324, 756), # 035 + (864, 1080), (324, 756)), + (None, isp, border, f_shrink, 0, (864, 432), (324, 108), # 036 + (864, 486), (324, 162)), + (None, isp, border, f_shrink, 1, (864, 432), (324, 108), # 037 + (864, 486), (324, 162)), + (None, isp, border, f_enlarge, 0, (1188, 540), (648, 216), # 038 + (1404, 756), (864, 432)), + (None, isp, border, f_enlarge, 1, (1188, 540), (648, 216), # 039 + (1404, 756), (864, 432)), + (None, isl, None, f_into, 0, (756, 252), (756, 252), # 040 + (648, 324), (648, 324)), + (None, isl, None, f_into, 1, (756, 252), (756, 252), # 041 + (648, 324), (648, 324)), + (None, isl, None, f_fill, 0, (972, 324), (972, 324), # 042 + (756, 378), (756, 378)), + (None, isl, None, f_fill, 1, (972, 324), (972, 324), # 043 + (756, 378), (756, 378)), + (None, isl, None, f_exact, 0, (756, 324), (756, 324), # 044 + (756, 324), (756, 324)), + (None, isl, None, f_exact, 1, (756, 324), (756, 324), # 045 + (756, 324), (756, 324)), + (None, isl, None, f_shrink, 0, (648, 216), (648, 216), # 046 + (648, 324), (648, 324)), + (None, isl, None, f_shrink, 1, (648, 216), (648, 216), # 047 + (648, 324), (648, 324)), + (None, isl, None, f_enlarge, 0, (756, 252), (756, 252), # 048 + (864, 432), (864, 432)), + (None, isl, None, f_enlarge, 1, (756, 252), (756, 252), # 049 + (864, 432), (864, 432)), + # psp=972x504, psp=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit imgpx + (None, isl, border, f_into, 0, (1296, 576), (756, 252), # 050 + (1188, 648), (648, 324)), + (None, isl, border, f_into, 1, (1296, 576), (756, 252), # 051 + (1188, 648), (648, 324)), + (None, isl, border, f_fill, 0, (1512, 648), (972, 324), # 052 + (1296, 702), (756, 378)), + (None, isl, border, f_fill, 1, (1512, 648), (972, 324), # 053 + (1296, 702), (756, 378)), + (None, isl, border, f_exact, 0, (1296, 648), (756, 324), # 054 + (1296, 648), (756, 324)), + (None, isl, border, f_exact, 1, (1296, 648), (756, 324), # 055 + (1296, 648), (756, 324)), + (None, isl, border, f_shrink, 0, (1188, 540), (648, 216), # 056 + (1188, 648), (648, 324)), + (None, isl, border, f_shrink, 1, (1188, 540), (648, 216), # 057 + (1188, 648), (648, 324)), + (None, isl, border, f_enlarge, 0, (1296, 576), (756, 252), # 058 + (1404, 756), (864, 432)), + (None, isl, border, f_enlarge, 1, (1296, 576), (756, 252), # 059 + (1404, 756), (864, 432)), + (psp, None, None, f_into, 0, (504, 972), (504, 168), # 060 + (504, 972), (504, 252)), + (psp, None, None, f_into, 1, (972, 504), (972, 324), # 061 + (972, 504), (972, 486)), + (psp, None, None, f_fill, 0, (504, 972), (2916, 972), # 062 + (504, 972), (1944, 972)), + (psp, None, None, f_fill, 1, (972, 504), (1512, 504), # 063 + (972, 504), (1008, 504)), + (psp, None, None, f_exact, 0, (504, 972), (504, 972), # 064 + (504, 972), (504, 972)), + (psp, None, None, f_exact, 1, (972, 504), (972, 504), # 065 + (972, 504), (972, 504)), + (psp, None, None, f_shrink, 0, (504, 972), (504, 168), # 066 + (504, 972), (504, 252)), + (psp, None, None, f_shrink, 1, (972, 504), (648, 216), # 067 + (972, 504), (864, 432)), + (psp, None, None, f_enlarge, 0, (504, 972), (648, 216), # 068 + (504, 972), (864, 432)), + (psp, None, None, f_enlarge, 1, (972, 504), (972, 324), # 069 + (972, 504), (972, 486)), + (psp, None, border, f_into, 0, None, None, None, None), # 070 + (psp, None, border, f_into, 1, None, None, None, None), # 071 + (psp, None, border, f_fill, 0, (504, 972), (1944, 648), # 072 + (504, 972), (1296, 648)), + (psp, None, border, f_fill, 1, (972, 504), (648, 216), # 073 + (972, 504), (648, 324)), + (psp, None, border, f_exact, 0, None, None, None, None), # 074 + (psp, None, border, f_exact, 1, None, None, None, None), # 075 + (psp, None, border, f_shrink, 0, None, None, None, None), # 076 + (psp, None, border, f_shrink, 1, None, None, None, None), # 077 + (psp, None, border, f_enlarge, 0, (504, 972), (648, 216), # 078 + (504, 972), (864, 432)), + (psp, None, border, f_enlarge, 1, (972, 504), (648, 216), # 079 + (972, 504), (864, 432)), + (psp, isp, None, f_into, 0, (504, 972), (324, 108), # 080 + (504, 972), (324, 162)), + (psp, isp, None, f_into, 1, (972, 504), (324, 108), # 081 + (972, 504), (324, 162)), + (psp, isp, None, f_fill, 0, (504, 972), (2268, 756), # 082 + (504, 972), (1512, 756)), + (psp, isp, None, f_fill, 1, (972, 504), (2268, 756), # 083 + (972, 504), (1512, 756)), + (psp, isp, None, f_exact, 0, (504, 972), (324, 756), # 084 + (504, 972), (324, 756)), + (psp, isp, None, f_exact, 1, (972, 504), (324, 756), # 085 + (972, 504), (324, 756)), + (psp, isp, None, f_shrink, 0, (504, 972), (324, 108), # 086 + (504, 972), (324, 162)), + (psp, isp, None, f_shrink, 1, (972, 504), (324, 108), # 087 + (972, 504), (324, 162)), + (psp, isp, None, f_enlarge, 0, (504, 972), (648, 216), # 088 + (504, 972), (864, 432)), + (psp, isp, None, f_enlarge, 1, (972, 504), (648, 216), # 089 + (972, 504), (864, 432)), + (psp, isp, border, f_into, 0, (504, 972), (324, 108), # 090 + (504, 972), (324, 162)), + (psp, isp, border, f_into, 1, (972, 504), (324, 108), # 091 + (972, 504), (324, 162)), + (psp, isp, border, f_fill, 0, (504, 972), (2268, 756), # 092 + (504, 972), (1512, 756)), + (psp, isp, border, f_fill, 1, (972, 504), (2268, 756), # 093 + (972, 504), (1512, 756)), + (psp, isp, border, f_exact, 0, (504, 972), (324, 756), # 094 + (504, 972), (324, 756)), + (psp, isp, border, f_exact, 1, (972, 504), (324, 756), # 095 + (972, 504), (324, 756)), + (psp, isp, border, f_shrink, 0, (504, 972), (324, 108), # 096 + (504, 972), (324, 162)), + (psp, isp, border, f_shrink, 1, (972, 504), (324, 108), # 097 + (972, 504), (324, 162)), + (psp, isp, border, f_enlarge, 0, (504, 972), (648, 216), # 098 + (504, 972), (864, 432)), + (psp, isp, border, f_enlarge, 1, (972, 504), (648, 216), # 099 + (972, 504), (864, 432)), + # psp=972x504, psp=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit imgpx + (psp, isl, None, f_into, 0, (504, 972), (756, 252), # 100 + (504, 972), (648, 324)), + (psp, isl, None, f_into, 1, (972, 504), (756, 252), # 101 + (972, 504), (648, 324)), + (psp, isl, None, f_fill, 0, (504, 972), (972, 324), # 102 + (504, 972), (756, 378)), + (psp, isl, None, f_fill, 1, (972, 504), (972, 324), # 103 + (972, 504), (756, 378)), + (psp, isl, None, f_exact, 0, (504, 972), (756, 324), # 104 + (504, 972), (756, 324)), + (psp, isl, None, f_exact, 1, (972, 504), (756, 324), # 105 + (972, 504), (756, 324)), + (psp, isl, None, f_shrink, 0, (504, 972), (648, 216), # 106 + (504, 972), (648, 324)), + (psp, isl, None, f_shrink, 1, (972, 504), (648, 216), # 107 + (972, 504), (648, 324)), + (psp, isl, None, f_enlarge, 0, (504, 972), (756, 252), # 108 + (504, 972), (864, 432)), + (psp, isl, None, f_enlarge, 1, (972, 504), (756, 252), # 109 + (972, 504), (864, 432)), + (psp, isl, border, f_into, 0, (504, 972), (756, 252), # 110 + (504, 972), (648, 324)), + (psp, isl, border, f_into, 1, (972, 504), (756, 252), # 111 + (972, 504), (648, 324)), + (psp, isl, border, f_fill, 0, (504, 972), (972, 324), # 112 + (504, 972), (756, 378)), + (psp, isl, border, f_fill, 1, (972, 504), (972, 324), # 113 + (972, 504), (756, 378)), + (psp, isl, border, f_exact, 0, (504, 972), (756, 324), # 114 + (504, 972), (756, 324)), + (psp, isl, border, f_exact, 1, (972, 504), (756, 324), # 115 + (972, 504), (756, 324)), + (psp, isl, border, f_shrink, 0, (504, 972), (648, 216), # 116 + (504, 972), (648, 324)), + (psp, isl, border, f_shrink, 1, (972, 504), (648, 216), # 117 + (972, 504), (648, 324)), + (psp, isl, border, f_enlarge, 0, (504, 972), (756, 252), # 118 + (504, 972), (864, 432)), + (psp, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 119 + (972, 504), (864, 432)), + (psl, None, None, f_into, 0, (972, 504), (972, 324), # 120 + (972, 504), (972, 486)), + (psl, None, None, f_into, 1, (972, 504), (972, 324), # 121 + (972, 504), (972, 486)), + (psl, None, None, f_fill, 0, (972, 504), (1512, 504), # 122 + (972, 504), (1008, 504)), + (psl, None, None, f_fill, 1, (972, 504), (1512, 504), # 123 + (972, 504), (1008, 504)), + (psl, None, None, f_exact, 0, (972, 504), (972, 504), # 124 + (972, 504), (972, 504)), + (psl, None, None, f_exact, 1, (972, 504), (972, 504), # 125 + (972, 504), (972, 504)), + (psl, None, None, f_shrink, 0, (972, 504), (648, 216), # 126 + (972, 504), (864, 432)), + (psl, None, None, f_shrink, 1, (972, 504), (648, 216), # 127 + (972, 504), (864, 432)), + (psl, None, None, f_enlarge, 0, (972, 504), (972, 324), # 128 + (972, 504), (972, 486)), + (psl, None, None, f_enlarge, 1, (972, 504), (972, 324), # 129 + (972, 504), (972, 486)), + (psl, None, border, f_into, 0, (972, 504), (432, 144), # 130 + (972, 504), (360, 180)), + (psl, None, border, f_into, 1, (972, 504), (432, 144), # 131 + (972, 504), (360, 180)), + (psl, None, border, f_fill, 0, (972, 504), (540, 180), # 132 + (972, 504), (432, 216)), + (psl, None, border, f_fill, 1, (972, 504), (540, 180), # 133 + (972, 504), (432, 216)), + (psl, None, border, f_exact, 0, (972, 504), (432, 180), # 134 + (972, 504), (432, 180)), + (psl, None, border, f_exact, 1, (972, 504), (432, 180), # 135 + (972, 504), (432, 180)), + (psl, None, border, f_shrink, 0, (972, 504), (432, 144), # 136 + (972, 504), (360, 180)), + (psl, None, border, f_shrink, 1, (972, 504), (432, 144), # 137 + (972, 504), (360, 180)), + (psl, None, border, f_enlarge, 0, (972, 504), (648, 216), # 138 + (972, 504), (864, 432)), + (psl, None, border, f_enlarge, 1, (972, 504), (648, 216), # 139 + (972, 504), (864, 432)), + (psl, isp, None, f_into, 0, (972, 504), (324, 108), # 140 + (972, 504), (324, 162)), + (psl, isp, None, f_into, 1, (972, 504), (324, 108), # 141 + (972, 504), (324, 162)), + (psl, isp, None, f_fill, 0, (972, 504), (2268, 756), # 142 + (972, 504), (1512, 756)), + (psl, isp, None, f_fill, 1, (972, 504), (2268, 756), # 143 + (972, 504), (1512, 756)), + (psl, isp, None, f_exact, 0, (972, 504), (324, 756), # 144 + (972, 504), (324, 756)), + (psl, isp, None, f_exact, 1, (972, 504), (324, 756), # 145 + (972, 504), (324, 756)), + (psl, isp, None, f_shrink, 0, (972, 504), (324, 108), # 146 + (972, 504), (324, 162)), + (psl, isp, None, f_shrink, 1, (972, 504), (324, 108), # 147 + (972, 504), (324, 162)), + (psl, isp, None, f_enlarge, 0, (972, 504), (648, 216), # 148 + (972, 504), (864, 432)), + (psl, isp, None, f_enlarge, 1, (972, 504), (648, 216), # 149 + (972, 504), (864, 432)), + # psp=972x504, psl=504x972, isl=756x324, isp=324x756, border=162:270 + # --pagesize --border -a pagepdf imgpdf + # --imgsize --fit imgpx + (psl, isp, border, f_into, 0, (972, 504), (324, 108), # 150 + (972, 504), (324, 162)), + (psl, isp, border, f_into, 1, (972, 504), (324, 108), # 151 + (972, 504), (324, 162)), + (psl, isp, border, f_fill, 0, (972, 504), (2268, 756), # 152 + (972, 504), (1512, 756)), + (psl, isp, border, f_fill, 1, (972, 504), (2268, 756), # 153 + (972, 504), (1512, 756)), + (psl, isp, border, f_exact, 0, (972, 504), (324, 756), # 154 + (972, 504), (324, 756)), + (psl, isp, border, f_exact, 1, (972, 504), (324, 756), # 155 + (972, 504), (324, 756)), + (psl, isp, border, f_shrink, 0, (972, 504), (324, 108), # 156 + (972, 504), (324, 162)), + (psl, isp, border, f_shrink, 1, (972, 504), (324, 108), # 157 + (972, 504), (324, 162)), + (psl, isp, border, f_enlarge, 0, (972, 504), (648, 216), # 158 + (972, 504), (864, 432)), + (psl, isp, border, f_enlarge, 1, (972, 504), (648, 216), # 159 + (972, 504), (864, 432)), + (psl, isl, None, f_into, 0, (972, 504), (756, 252), # 160 + (972, 504), (648, 324)), + (psl, isl, None, f_into, 1, (972, 504), (756, 252), # 161 + (972, 504), (648, 324)), + (psl, isl, None, f_fill, 0, (972, 504), (972, 324), # 162 + (972, 504), (756, 378)), + (psl, isl, None, f_fill, 1, (972, 504), (972, 324), # 163 + (972, 504), (756, 378)), + (psl, isl, None, f_exact, 0, (972, 504), (756, 324), # 164 + (972, 504), (756, 324)), + (psl, isl, None, f_exact, 1, (972, 504), (756, 324), # 165 + (972, 504), (756, 324)), + (psl, isl, None, f_shrink, 0, (972, 504), (648, 216), # 166 + (972, 504), (648, 324)), + (psl, isl, None, f_shrink, 1, (972, 504), (648, 216), # 167 + (972, 504), (648, 324)), + (psl, isl, None, f_enlarge, 0, (972, 504), (756, 252), # 168 + (972, 504), (864, 432)), + (psl, isl, None, f_enlarge, 1, (972, 504), (756, 252), # 169 + (972, 504), (864, 432)), + (psl, isl, border, f_into, 0, (972, 504), (756, 252), # 170 + (972, 504), (648, 324)), + (psl, isl, border, f_into, 1, (972, 504), (756, 252), # 171 + (972, 504), (648, 324)), + (psl, isl, border, f_fill, 0, (972, 504), (972, 324), # 172 + (972, 504), (756, 378)), + (psl, isl, border, f_fill, 1, (972, 504), (972, 324), # 173 + (972, 504), (756, 378)), + (psl, isl, border, f_exact, 0, (972, 504), (756, 324), # 174 + (972, 504), (756, 324)), + (psl, isl, border, f_exact, 1, (972, 504), (756, 324), # 175 + (972, 504), (756, 324)), + (psl, isl, border, f_shrink, 0, (972, 504), (648, 216), # 176 + (972, 504), (648, 324)), + (psl, isl, border, f_shrink, 1, (972, 504), (648, 216), # 177 + (972, 504), (648, 324)), + (psl, isl, border, f_enlarge, 0, (972, 504), (756, 252), # 178 + (972, 504), (864, 432)), + (psl, isl, border, f_enlarge, 1, (972, 504), (756, 252), # 179 + (972, 504), (864, 432)), + (poster, None, None, f_fill, 0, (97200, 50400), (151200, 50400), + (97200, 50400), (100800, 50400)), +] + + +def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4): + # Quick and dirty TIFF header builder from + # https://stackoverflow.com/questions/2641770 + tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h' + return struct.pack( + tiff_header_struct, + b'II', # Byte order indication: Little indian + 42, # Version number (always 42) + 8, # Offset to first IFD + 8, # Number of tags in IFD + 256, 4, 1, width, # ImageWidth, LONG, 1, width + 257, 4, 1, height, # ImageLength, LONG, 1, lenght + 258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1 + 259, 3, 1, ccitt_group, # Compression, SHORT, 1, 4 = CCITT Group 4 + 262, 3, 1, 1, # Threshholding, SHORT, 1, 0 = WhiteIsZero + 273, 4, 1, struct.calcsize( + tiff_header_struct), # StripOffsets, LONG, 1, len of header + 278, 4, 1, height, # RowsPerStrip, LONG, 1, lenght + 279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image + 0 # last IFD + ) + + +class CommandLineTests(unittest.TestCase): + def test_main_help(self): + if PY3: + from contextlib import redirect_stdout + f = StringIO() + with redirect_stdout(f): + try: + img2pdf.main(['img2pdf', '--help']) + except SystemExit: + pass + res = f.getvalue() + self.assertIn('img2pdf', res) + else: + # silence output + sys_stdout = sys.stdout + sys.stdout = BytesIO() + + try: + img2pdf.main(['img2pdf', '--help']) + except SystemExit: + # argparse does sys.exit(0) on --help + res = sys.stdout.getvalue() + self.assertIn('img2pdf', res) + finally: + sys.stdout = sys_stdout + + +def test_suite(): + class TestImg2Pdf(unittest.TestCase): + pass + + for i, (psopt, isopt, border, fit, ao, pspdf1, ispdf1, + pspdf2, ispdf2) in enumerate(layout_test_cases): + if isopt is not None: + isopt = ((img2pdf.ImgSize.abs, isopt[0]), + (img2pdf.ImgSize.abs, isopt[1])) + + def layout_handler( + self, psopt, isopt, border, fit, ao, pspdf, ispdf, im): + layout_fun = img2pdf.get_layout_fun(psopt, isopt, border, fit, ao) + try: + pwpdf, phpdf, iwpdf, ihpdf = \ + layout_fun(im[0], im[1], (img2pdf.default_dpi, + img2pdf.default_dpi)) + self.assertEqual((pwpdf, phpdf), pspdf) + self.assertEqual((iwpdf, ihpdf), ispdf) + except img2pdf.NegativeDimensionError: + self.assertEqual(None, pspdf) + self.assertEqual(None, ispdf) + + def layout_handler_im1(self, psopt=psopt, isopt=isopt, border=border, + fit=fit, ao=ao, pspdf=pspdf1, ispdf=ispdf1): + layout_handler(self, psopt, isopt, border, fit, ao, pspdf, ispdf, + im1) + setattr(TestImg2Pdf, "test_layout_%03d_im1" % i, layout_handler_im1) + + def layout_handler_im2(self, psopt=psopt, isopt=isopt, border=border, + fit=fit, ao=ao, pspdf=pspdf2, ispdf=ispdf2): + layout_handler(self, psopt, isopt, border, fit, ao, pspdf, ispdf, + im2) + setattr(TestImg2Pdf, "test_layout_%03d_im2" % i, layout_handler_im2) + + files = os.listdir(os.path.join(HERE, "input")) + for with_pdfrw, test_name in [(a, b) for a in [True, False] + for b in files]: + # we do not test animation.gif with pdfrw because it doesn't support + # saving hexadecimal palette data + if test_name == 'animation.gif' and with_pdfrw: + continue + inputf = os.path.join(HERE, "input", test_name) + if not os.path.isfile(inputf): + continue + outputf = os.path.join(HERE, "output", test_name+".pdf") + assert os.path.isfile(outputf) + + def handle(self, f=inputf, out=outputf, with_pdfrw=with_pdfrw): + with open(f, "rb") as inf: + orig_imgdata = inf.read() + output = img2pdf.convert(orig_imgdata, nodate=True, + with_pdfrw=with_pdfrw) + from pdfrw import PdfReader, PdfName, PdfWriter + from pdfrw.py23_diffs import convert_load, convert_store + x = PdfReader(PdfReaderIO(convert_load(output))) + self.assertEqual(sorted(x.keys()), [PdfName.Info, PdfName.Root, + PdfName.Size]) + self.assertIn(x.Root.Pages.Count, ('1', '2')) + if len(x.Root.Pages.Kids) == '1': + self.assertEqual(x.Size, '7') + self.assertEqual(len(x.Root.Pages.Kids), 1) + elif len(x.Root.Pages.Kids) == '2': + self.assertEqual(x.Size, '10') + self.assertEqual(len(x.Root.Pages.Kids), 2) + self.assertEqual(x.Info, {}) + self.assertEqual(sorted(x.Root.keys()), [PdfName.Pages, + PdfName.Type]) + self.assertEqual(x.Root.Type, PdfName.Catalog) + self.assertEqual(sorted(x.Root.Pages.keys()), + [PdfName.Count, PdfName.Kids, PdfName.Type]) + self.assertEqual(x.Root.Pages.Type, PdfName.Pages) + orig_img = Image.open(f) + for pagenum in range(len(x.Root.Pages.Kids)): + # retrieve the original image frame that this page was + # generated from + orig_img.seek(pagenum) + cur_page = x.Root.Pages.Kids[pagenum] + + ndpi = orig_img.info.get("dpi", (96.0, 96.0)) + # In python3, the returned dpi value for some tiff images will + # not be an integer but a float. To make the behaviour of + # img2pdf the same between python2 and python3, we convert that + # float into an integer by rounding. + # Search online for the 72.009 dpi problem for more info. + ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) + imgwidthpx, imgheightpx = orig_img.size + pagewidth = 72.0*imgwidthpx/ndpi[0] + pageheight = 72.0*imgheightpx/ndpi[1] + + def format_float(f): + if int(f) == f: + return str(int(f)) + else: + return ("%.4f" % f).rstrip("0") + + self.assertEqual(sorted(cur_page.keys()), + [PdfName.Contents, PdfName.MediaBox, + PdfName.Parent, PdfName.Resources, + PdfName.Type]) + self.assertEqual(cur_page.MediaBox, + ['0', '0', format_float(pagewidth), + format_float(pageheight)]) + self.assertEqual(cur_page.Parent, x.Root.Pages) + self.assertEqual(cur_page.Type, PdfName.Page) + self.assertEqual(cur_page.Resources.keys(), + [PdfName.XObject]) + self.assertEqual(cur_page.Resources.XObject.keys(), + [PdfName.Im0]) + self.assertEqual(cur_page.Contents.keys(), + [PdfName.Length]) + self.assertEqual(cur_page.Contents.Length, + str(len(cur_page.Contents.stream))) + self.assertEqual(cur_page.Contents.stream, + "q\n%.4f 0 0 %.4f 0.0000 0.0000 cm\n" + "/Im0 Do\nQ" % (pagewidth, pageheight)) + + imgprops = cur_page.Resources.XObject.Im0 + + # test if the filter is valid: + self.assertIn( + imgprops.Filter, [PdfName.DCTDecode, PdfName.JPXDecode, + PdfName.FlateDecode, + [PdfName.CCITTFaxDecode]]) + + # test if the image has correct size + self.assertEqual(imgprops.Width, str(orig_img.size[0])) + self.assertEqual(imgprops.Height, str(orig_img.size[1])) + # if the input file is a jpeg then it should've been copied + # verbatim into the PDF + if imgprops.Filter in [PdfName.DCTDecode, + PdfName.JPXDecode]: + self.assertEqual( + cur_page.Resources.XObject.Im0.stream, + convert_load(orig_imgdata)) + elif imgprops.Filter == [PdfName.CCITTFaxDecode]: + tiff_header = tiff_header_for_ccitt( + int(imgprops.Width), int(imgprops.Height), + int(imgprops.Length), 4) + imgio = BytesIO() + imgio.write(tiff_header) + imgio.write(convert_store( + cur_page.Resources.XObject.Im0.stream)) + imgio.seek(0) + im = Image.open(imgio) + self.assertEqual(im.tobytes(), orig_img.tobytes()) + try: + im.close() + except AttributeError: + pass + + elif imgprops.Filter == PdfName.FlateDecode: + # otherwise, the data is flate encoded and has to be equal + # to the pixel data of the input image + imgdata = zlib.decompress( + convert_store(cur_page.Resources.XObject.Im0.stream)) + if imgprops.DecodeParms: + if orig_img.format == 'PNG': + pngidat, palette = img2pdf.parse_png(orig_imgdata) + elif orig_img.format == 'TIFF' \ + and orig_img.info['compression'] == "group4": + offset, length = \ + img2pdf.ccitt_payload_location_from_pil( + orig_img) + pngidat = orig_imgdata[offset:offset+length] + else: + pngbuffer = BytesIO() + orig_img.save(pngbuffer, format="png") + pngidat, palette = img2pdf.parse_png( + pngbuffer.getvalue()) + self.assertEqual(zlib.decompress(pngidat), imgdata) + else: + colorspace = imgprops.ColorSpace + if colorspace == PdfName.DeviceGray: + colorspace = 'L' + elif colorspace == PdfName.DeviceRGB: + colorspace = 'RGB' + elif colorspace == PdfName.DeviceCMYK: + colorspace = 'CMYK' + else: + raise Exception("invalid colorspace") + im = Image.frombytes(colorspace, + (int(imgprops.Width), + int(imgprops.Height)), + imgdata) + if orig_img.mode == '1': + self.assertEqual(im.tobytes(), + orig_img.convert("L").tobytes()) + elif orig_img.mode not in ("RGB", "L", "CMYK", + "CMYK;I"): + self.assertEqual(im.tobytes(), + orig_img.convert("RGB").tobytes()) + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does + # not have the close() method + try: + im.close() + except AttributeError: + pass + # now use pdfrw to parse and then write out both pdfs and check the + # result for equality + y = PdfReader(out) + outx = BytesIO() + outy = BytesIO() + xwriter = PdfWriter() + ywriter = PdfWriter() + xwriter.trailer = x + ywriter.trailer = y + xwriter.write(outx) + ywriter.write(outy) + self.assertEqual(compare_pdf(outx.getvalue(), outy.getvalue()), True) + # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the + # close() method + try: + orig_img.close() + except AttributeError: + pass + if with_pdfrw: + setattr(TestImg2Pdf, "test_%s_with_pdfrw" % test_name, handle) + else: + setattr(TestImg2Pdf, "test_%s_without_pdfrw" % test_name, handle) + + return unittest.TestSuite(( + unittest.makeSuite(TestImg2Pdf), + unittest.makeSuite(CommandLineTests), + )) diff --git a/src/tests/input/CMYK.jpg b/src/tests/input/CMYK.jpg new file mode 100644 index 0000000..44213a8 Binary files /dev/null and b/src/tests/input/CMYK.jpg differ diff --git a/src/tests/input/CMYK.tif b/src/tests/input/CMYK.tif new file mode 100644 index 0000000..8e3803e Binary files /dev/null and b/src/tests/input/CMYK.tif differ diff --git a/src/tests/input/animation.gif b/src/tests/input/animation.gif new file mode 100644 index 0000000..af4b278 Binary files /dev/null and b/src/tests/input/animation.gif differ diff --git a/src/tests/input/gray.png b/src/tests/input/gray.png new file mode 100644 index 0000000..48247fd Binary files /dev/null and b/src/tests/input/gray.png differ diff --git a/src/tests/input/mono.png b/src/tests/input/mono.png new file mode 100644 index 0000000..59b17ad Binary files /dev/null and b/src/tests/input/mono.png differ diff --git a/src/tests/input/mono.tif b/src/tests/input/mono.tif new file mode 100644 index 0000000..3718d52 Binary files /dev/null and b/src/tests/input/mono.tif differ diff --git a/src/tests/input/normal.jpg b/src/tests/input/normal.jpg new file mode 100644 index 0000000..2c036e9 Binary files /dev/null and b/src/tests/input/normal.jpg differ diff --git a/src/tests/input/normal.png b/src/tests/input/normal.png new file mode 100644 index 0000000..394f965 Binary files /dev/null and b/src/tests/input/normal.png differ diff --git a/src/tests/output/CMYK.jpg.pdf b/src/tests/output/CMYK.jpg.pdf new file mode 100644 index 0000000..9efbe16 Binary files /dev/null and b/src/tests/output/CMYK.jpg.pdf differ diff --git a/src/tests/output/CMYK.tif.pdf b/src/tests/output/CMYK.tif.pdf new file mode 100644 index 0000000..242bac7 Binary files /dev/null and b/src/tests/output/CMYK.tif.pdf differ diff --git a/src/tests/output/animation.gif.pdf b/src/tests/output/animation.gif.pdf new file mode 100644 index 0000000..fdfd460 Binary files /dev/null and b/src/tests/output/animation.gif.pdf differ diff --git a/src/tests/output/gray.png.pdf b/src/tests/output/gray.png.pdf new file mode 100644 index 0000000..3f2d4c3 Binary files /dev/null and b/src/tests/output/gray.png.pdf differ diff --git a/src/tests/output/mono.png.pdf b/src/tests/output/mono.png.pdf new file mode 100644 index 0000000..c773715 Binary files /dev/null and b/src/tests/output/mono.png.pdf differ diff --git a/src/tests/output/mono.tif.pdf b/src/tests/output/mono.tif.pdf new file mode 100644 index 0000000..eda3ec7 Binary files /dev/null and b/src/tests/output/mono.tif.pdf differ diff --git a/src/tests/output/normal.jpg.pdf b/src/tests/output/normal.jpg.pdf new file mode 100644 index 0000000..7acbe20 Binary files /dev/null and b/src/tests/output/normal.jpg.pdf differ diff --git a/src/tests/output/normal.png.pdf b/src/tests/output/normal.png.pdf new file mode 100644 index 0000000..971475f Binary files /dev/null and b/src/tests/output/normal.png.pdf differ diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..5b34a30 --- /dev/null +++ b/test.sh @@ -0,0 +1,1468 @@ +#!/bin/sh + +set -eu + +similar() +{ + psnr=$(compare -metric PSNR "$1" "$2" null: 2>&1 || true) + if [ -z "$psnr" ]; then + echo "compare failed" + return 1 + fi + + # PSNR of zero means that they are identical + if [ "$psnr" = 0 ]; then + echo "images are equal -- don't use similar() but require exactness" + exit 2 + fi + + # The lower PSNR value, the fewer the similarities + # The lowest (and worst) value is 1.0 + min_psnr=50 + if [ "$min_psnr" != "$( printf "$psnr\n$min_psnr\n" | sort --general-numeric-sort | head --lines=1)" ]; then + echo "pdf wrongly rendered" + return 1 + fi + return 0 +} + +compare_rendered() +{ + pdf="$1" + img="$2" + gsdevice=png16m + if [ "$#" -eq 3 ]; then + gsdevice="$3" + fi + + compare_ghostscript "$pdf" "$img" "$gsdevice" + + compare_poppler "$pdf" "$img" + + compare_mupdf "$pdf" "$img" +} + +compare_ghostscript() +{ + pdf="$1" + img="$2" + gsdevice="$3" + gs -dQUIET -dNOPAUSE -dBATCH -sDEVICE="$gsdevice" -r96 -sOutputFile="$tempdir/gs-%00d.png" "$pdf" + compare -metric AE "$img" "$tempdir/gs-1.png" null: 2>/dev/null + rm "$tempdir/gs-1.png" +} + +compare_poppler() +{ + pdf="$1" + img="$2" + pdftocairo -r 96 -png "$pdf" "$tempdir/poppler" + compare -metric AE "$img" "$tempdir/poppler-1.png" null: 2>/dev/null + rm "$tempdir/poppler-1.png" +} + +compare_mupdf() +{ + pdf="$1" + img="$2" + mutool draw -o "$tempdir/mupdf.png" -r 96 "$pdf" 2>/dev/null + compare -metric AE "$img" "$tempdir/mupdf.png" null: 2>/dev/null + rm "$tempdir/mupdf.png" +} + +compare_pdfimages() +{ + pdf="$1" + img="$2" + pdfimages -png "$pdf" "$tempdir/images" + compare -metric AE "$img" "$tempdir/images-000.png" null: 2>/dev/null + rm "$tempdir/images-000.png" +} + +error() +{ + echo test $j failed + echo intermediate data is left in $tempdir + exit 1 +} + +tempdir=$(mktemp --directory --tmpdir img2pdf.XXXXXXXXXX) + +trap error EXIT + +# we use -strip to remove all timestamps (tIME chunk and exif data) +convert -size 60x60 \( xc:none -fill red -draw 'circle 30,21 30,3' -gaussian-blur 0x3 \) \ + \( \( xc:none -fill lime -draw 'circle 39,39 36,57' -gaussian-blur 0x3 \) \ + \( xc:none -fill blue -draw 'circle 21,39 24,57' -gaussian-blur 0x3 \) \ + -compose plus -composite \ + \) -compose plus -composite \ + -strip \ + "$tempdir/alpha.png" + +convert "$tempdir/alpha.png" -background black -alpha remove -alpha off -strip "$tempdir/normal16.png" + +convert "$tempdir/normal16.png" -depth 8 -strip "$tempdir/normal.png" + +convert "$tempdir/normal.png" -negate -strip "$tempdir/inverse.png" + +convert "$tempdir/normal16.png" -colorspace Gray -depth 16 -strip "$tempdir/gray16.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 256 -depth 8 -strip "$tempdir/gray8.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 16 -depth 4 -strip "$tempdir/gray4.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 4 -depth 2 -strip "$tempdir/gray2.png" +convert "$tempdir/normal16.png" -colorspace Gray -dither FloydSteinberg -colors 2 -depth 1 -strip "$tempdir/gray1.png" + +# use "-define png:exclude-chunk=bkgd" because otherwise, imagemagick will +# add the background color (white) as an additional entry to the palette +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 2 -define png:exclude-chunk=bkgd -strip "$tempdir/palette1.png" +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 4 -define png:exclude-chunk=bkgd -strip "$tempdir/palette2.png" +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 16 -define png:exclude-chunk=bkgd -strip "$tempdir/palette4.png" +convert "$tempdir/normal.png" -dither FloydSteinberg -colors 256 -define png:exclude-chunk=bkgd -strip "$tempdir/palette8.png" + +cat << END | ( cd "$tempdir"; md5sum --check --status - ) +a99ef2a356c315090b6939fa4ce70516 alpha.png +0df21ebbce5292654119b17f6e52bc81 gray16.png +6faee81b8db446caa5004ad71bddcb5b gray1.png +97e423da517ede069348484a1283aa6c gray2.png +cbed1b6da5183aec0b86909e82b77c41 gray4.png +c0df42fdd69ae2a16ad0c23adb39895e gray8.png +ac6bb850fb5aaee9fa7dcb67525cd0fc inverse.png +3f3f8579f5054270e79a39e7cc4e89e0 normal16.png +cbe63b21443af8321b213bde6666951f normal.png +2f00705cca05fd94406fc39ede4d7322 palette1.png +6cb250d1915c2af99c324c43ff8286eb palette2.png +ab7b3d3907a851692ee36f5349ed0b2c palette4.png +03829af4af8776adf56ba2e68f5b111e palette8.png +END + +# use img2pdfprog environment variable if it is set +if [ -z ${img2pdfprog+x} ]; then + img2pdfprog=src/img2pdf.py +fi + +img2pdf() +{ + # we use --without-pdfrw to better "grep" the result and because we + # cannot write palette based images otherwise + $img2pdfprog --without-pdfrw --producer="" --nodate "$1" > "$2" 2>/dev/null +} + +tests=51 # number of tests +j=1 # current test + +############################################################################### +echo "Test $j/$tests JPEG" + +convert "$tempdir/normal.png" "$tempdir/normal.jpg" + +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Format: JPEG (Joint Photographic Experts Group JFIF format)$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Mime type: image/jpeg$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Compression: JPEG$' + +img2pdf "$tempdir/normal.jpg" "$tempdir/out.pdf" + +# We have to use jpegtopnm with the original JPG before being able to compare +# it with imagemagick because imagemagick will decode the JPG slightly +# differently than ghostscript, poppler and mupdf do it. +# We have to use jpegtopnm and cannot use djpeg because the latter produces +# slightly different results as well when called like this: +# djpeg -dct int -pnm "$tempdir/normal.jpg" > "$tempdir/normal.pnm" +# An alternative way to compare the JPG would be to require a different DCT +# method when decoding by setting -define jpeg:dct-method=ifast in the +# compare command. +jpegtopnm -dct int "$tempdir/normal.jpg" > "$tempdir/normal.pnm" 2>/dev/null + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.pnm" + +pdfimages -j "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jpg" "$tempdir/images-000.jpg" +rm "$tempdir/images-000.jpg" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /DCTDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jpg" "$tempdir/normal.pnm" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests JPEG (90° rotated)" + +convert "$tempdir/normal.png" "$tempdir/normal.jpg" +exiftool -overwrite_original -all= "$tempdir/normal.jpg" -n >/dev/null +exiftool -overwrite_original -Orientation=6 -XResolution=96 -YResolution=96 -n "$tempdir/normal.jpg" >/dev/null + +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Format: JPEG (Joint Photographic Experts Group JFIF format)$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Mime type: image/jpeg$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Compression: JPEG$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:Orientation: 6$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:ResolutionUnit: 2$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:XResolution: 96/1$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ exif:YResolution: 96/1$' + +img2pdf "$tempdir/normal.jpg" "$tempdir/out.pdf" + +# We have to use jpegtopnm with the original JPG before being able to compare +# it with imagemagick because imagemagick will decode the JPG slightly +# differently than ghostscript, poppler and mupdf do it. +# We have to use jpegtopnm and cannot use djpeg because the latter produces +# slightly different results as well when called like this: +# djpeg -dct int -pnm "$tempdir/normal.jpg" > "$tempdir/normal.pnm" +# An alternative way to compare the JPG would be to require a different DCT +# method when decoding by setting -define jpeg:dct-method=ifast in the +# compare command. +jpegtopnm -dct int "$tempdir/normal.jpg" > "$tempdir/normal.pnm" 2>/dev/null +convert -rotate "90" "$tempdir/normal.pnm" "$tempdir/normal_rotated.png" +#convert -rotate "0" "$tempdir/normal.pnm" "$tempdir/normal_rotated.png" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal_rotated.png" + +pdfimages -j "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jpg" "$tempdir/images-000.jpg" +rm "$tempdir/images-000.jpg" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /DCTDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" +grep --quiet '^ /Rotate 90$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jpg" "$tempdir/normal.pnm" "$tempdir/out.pdf" "$tempdir/normal_rotated.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests JPEG CMYK" + +convert "$tempdir/normal.png" -colorspace cmyk "$tempdir/normal.jpg" + +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Format: JPEG (Joint Photographic Experts Group JFIF format)$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Mime type: image/jpeg$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Colorspace: CMYK$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Type: ColorSeparation$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jpg" | grep --quiet '^ Compression: JPEG$' + +img2pdf "$tempdir/normal.jpg" "$tempdir/out.pdf" + +gs -dQUIET -dNOPAUSE -dBATCH -sDEVICE=tiff32nc -r96 -sOutputFile="$tempdir/gs-%00d.tiff" "$tempdir/out.pdf" +similar "$tempdir/normal.jpg" "$tempdir/gs-1.tiff" +rm "$tempdir/gs-1.tiff" + +# not testing with poppler as it cannot write CMYK images + +mutool draw -o "$tempdir/mupdf.pam" -r 96 -c cmyk "$pdf" 2>/dev/null +similar "$tempdir/normal.jpg" "$tempdir/mupdf.pam" +rm "$tempdir/mupdf.pam" + +pdfimages -j "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jpg" "$tempdir/images-000.jpg" +rm "$tempdir/images-000.jpg" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceCMYK$' "$tempdir/out.pdf" +grep --quiet '^ /Decode \[ 1 0 1 0 1 0 1 0 \]$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /DCTDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jpg" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests JPEG2000" + +convert "$tempdir/normal.png" "$tempdir/normal.jp2" + +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Format: JP2 (JPEG-2000 File Format Syntax)$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Mime type: image/jp2$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.jp2" | grep --quiet '^ Compression: JPEG2000$' + +img2pdf "$tempdir/normal.jp2" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.jp2" + +pdfimages -jp2 "$tempdir/out.pdf" "$tempdir/images" +cmp "$tempdir/normal.jp2" "$tempdir/images-000.jp2" +rm "$tempdir/images-000.jp2" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /JPXDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.jp2" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +#echo Test JPEG2000 CMYK +# +# cannot test because imagemagick does not support JPEG2000 CMYK + +############################################################################### +echo "Test $j/$tests PNG RGB8" + +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.color-type-orig: 2$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.color_type: 2 (Truecolor)$' +identify -verbose "$tempdir/normal.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/normal.png" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.png" + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/normal.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG RGB16" + +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.color-type-orig: 2$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.color_type: 2 (Truecolor)$' +identify -verbose "$tempdir/normal16.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/normal16.png" "$tempdir/out.pdf" + +compare_ghostscript "$tempdir/out.pdf" "$tempdir/normal16.png" tiff48nc + +# poppler outputs 8-bit RGB so the comparison will not be exact +pdftocairo -r 96 -png "$tempdir/out.pdf" "$tempdir/poppler" +similar "$tempdir/normal16.png" "$tempdir/poppler-1.png" +rm "$tempdir/poppler-1.png" + +# pdfimages is unable to write 16 bit output + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG RGBA8" + +convert "$tempdir/alpha.png" -depth 8 -strip "$tempdir/alpha8.png" + +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.color-type-orig: 6$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.color_type: 6 (RGBA)$' +identify -verbose "$tempdir/alpha8.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha8.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha8.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG RGBA16" + +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.color-type-orig: 6$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.color_type: 6 (RGBA)$' +identify -verbose "$tempdir/alpha.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG Gray8 Alpha" + +convert "$tempdir/alpha.png" -colorspace Gray -dither FloydSteinberg -colors 256 -depth 8 -strip "$tempdir/alpha_gray8.png" + +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Type: GrayscaleAlpha$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.color-type-orig: 4$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.color_type: 4 (GrayAlpha)$' +identify -verbose "$tempdir/alpha_gray8.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha_gray8.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha_gray8.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG Gray16 Alpha" + +convert "$tempdir/alpha.png" -colorspace Gray -depth 16 -strip "$tempdir/alpha_gray16.png" + +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Type: GrayscaleAlpha$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.color-type-orig: 4$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.color_type: 4 (GrayAlpha)$' +identify -verbose "$tempdir/alpha_gray16.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/alpha_gray16.png" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha_gray16.png" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests PNG interlaced" + +convert "$tempdir/normal.png" -interlace PNG -strip "$tempdir/interlace.png" + +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 8$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.bit_depth: 8$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.color-type-orig: 2$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.color_type: 2 (Truecolor)$' +identify -verbose "$tempdir/interlace.png" | grep --quiet '^ png:IHDR.interlace_method: 1 (Adam7 method)$' + +img2pdf "$tempdir/interlace.png" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.png" + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/normal.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/interlace.png" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests PNG Gray$i" + + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Mime type: image/png$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Colorspace: Gray$' + if [ "$i" -eq 1 ]; then + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Type: Bilevel$' + else + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Type: Grayscale$' + fi + if [ "$i" -eq 8 ]; then + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ Depth: 8-bit$" + else + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ Depth: 8/$i-bit$" + fi + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ png:IHDR.bit-depth-orig: $i$" + identify -verbose "$tempdir/gray$i.png" | grep --quiet "^ png:IHDR.bit_depth: $i$" + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ png:IHDR.color-type-orig: 0$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ png:IHDR.color_type: 0 (Grayscale)$' + identify -verbose "$tempdir/gray$i.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + + img2pdf "$tempdir/gray$i.png" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/gray$i.png" pnggray + + compare_pdfimages "$tempdir/out.pdf" "$tempdir/gray$i.png" + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests PNG Gray16" + +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Mime type: image/png$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Type: Grayscale$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.bit-depth-orig: 16$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.bit_depth: 16$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.color-type-orig: 0$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.color_type: 0 (Grayscale)$' +identify -verbose "$tempdir/gray16.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + +img2pdf "$tempdir/gray16.png" "$tempdir/out.pdf" + +# ghostscript outputs 8-bit grayscale, so the comparison will not be exact +gs -dQUIET -dNOPAUSE -dBATCH -sDEVICE=pnggray -r96 -sOutputFile="$tempdir/gs-%00d.png" "$tempdir/out.pdf" +similar "$tempdir/gray16.png" "$tempdir/gs-1.png" +rm "$tempdir/gs-1.png" + +# poppler outputs 8-bit grayscale so the comparison will not be exact +pdftocairo -r 96 -png "$tempdir/out.pdf" "$tempdir/poppler" +similar "$tempdir/gray16.png" "$tempdir/poppler-1.png" +rm "$tempdir/poppler-1.png" + +# pdfimages outputs 8-bit grayscale so the comparison will not be exact +pdfimages -png "$tempdir/out.pdf" "$tempdir/images" +similar "$tempdir/gray16.png" "$tempdir/images-000.png" +rm "$tempdir/images-000.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 16$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests PNG Palette$i" + + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Format: PNG (Portable Network Graphics)$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Mime type: image/png$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Type: Palette$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Depth: 8-bit$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet "^ png:IHDR.bit-depth-orig: $i$" + identify -verbose "$tempdir/palette$i.png" | grep --quiet "^ png:IHDR.bit_depth: $i$" + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ png:IHDR.color-type-orig: 3$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ png:IHDR.color_type: 3 (Indexed)$' + identify -verbose "$tempdir/palette$i.png" | grep --quiet '^ png:IHDR.interlace_method: 0 (Not interlaced)$' + + img2pdf "$tempdir/palette$i.png" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/palette$i.png" + + # pdfimages cannot export palette based images + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace \[ /Indexed /DeviceRGB ' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests GIF transparent" + +convert "$tempdir/alpha.png" "$tempdir/alpha.gif" + +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Mime type: image/gif$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Type: PaletteAlpha$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Colormap entries: 256$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha.gif" | grep --quiet '^ Compression: LZW$' + +img2pdf "$tempdir/alpha.gif" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha.gif" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests GIF Palette$i" + + convert "$tempdir/palette$i.png" "$tempdir/palette$i.gif" + + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Mime type: image/gif$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Type: Palette$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Depth: 8-bit$' + case $i in + 1) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 2$';; + 2) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 4$';; + 4) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 16$';; + 8) identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Colormap entries: 256$';; + esac + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.gif" | grep --quiet '^ Compression: LZW$' + + img2pdf "$tempdir/palette$i.gif" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/palette$i.png" + + # pdfimages cannot export palette based images + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace \[ /Indexed /DeviceRGB ' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" "$tempdir/palette$i.gif" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests GIF animation" + +convert "$tempdir/normal.png" "$tempdir/inverse.png" -strip "$tempdir/animation.gif" + +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Mime type: image/gif$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Type: Palette$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Colormap entries: 256$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[0]" | grep --quiet '^ Compression: LZW$' + +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Format: GIF (CompuServe graphics interchange format)$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Mime type: image/gif$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Type: Palette$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Colormap entries: 256$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Compression: LZW$' +identify -verbose "$tempdir/animation.gif[1]" | grep --quiet '^ Scene: 1$' + +img2pdf "$tempdir/animation.gif" "$tempdir/out.pdf" + +if [ "$(pdfinfo "$tempdir/out.pdf" | awk '/Pages:/ {print $2}')" != 2 ]; then + echo "pdf does not have 2 pages" + exit 1 +fi + +pdfseparate "$tempdir/out.pdf" "$tempdir/page-%d.pdf" +rm "$tempdir/out.pdf" + +for page in 1 2; do + compare_rendered "$tempdir/page-$page.pdf" "$tempdir/animation.gif[$((page-1))]" + + # pdfimages cannot export palette based images + + # We cannot grep the PDF metadata here, because the page was + # rewritten into a non-greppable format by pdfseparate. but that's + # okay, because we already grepped single pages before and multipage + # PDF should not be different. + + rm "$tempdir/page-$page.pdf" +done + +rm "$tempdir/animation.gif" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF float" + +convert "$tempdir/normal.png" -depth 32 -define quantum:format=floating-point "$tempdir/float.tiff" + +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Depth: 32/8-bit$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ quantum:format: floating-point$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/float.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/float.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/float.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CMYK8" + +convert "$tempdir/normal.png" -colorspace cmyk "$tempdir/cmyk8.tiff" + +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Colorspace: CMYK$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Type: ColorSeparation$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/cmyk8.tiff" | grep --quiet '^ tiff:photometric: separated$' + +img2pdf "$tempdir/cmyk8.tiff" "$tempdir/out.pdf" + +compare_ghostscript "$tempdir/out.pdf" "$tempdir/cmyk8.tiff" tiff32nc + +# not testing with poppler as it cannot write CMYK images + +mutool draw -o "$tempdir/mupdf.pam" -r 96 -c cmyk "$pdf" 2>/dev/null +compare -metric AE "$tempdir/cmyk8.tiff" "$tempdir/mupdf.pam" null: 2>/dev/null +rm "$tempdir/mupdf.pam" + +pdfimages -tiff "$tempdir/out.pdf" "$tempdir/images" +compare -metric AE "$tempdir/cmyk8.tiff" "$tempdir/images-000.tif" null: 2>/dev/null +rm "$tempdir/images-000.tif" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceCMYK$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/cmyk8.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CMYK16" + +convert "$tempdir/normal.png" -depth 16 -colorspace cmyk "$tempdir/cmyk16.tiff" + +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Colorspace: CMYK$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Type: ColorSeparation$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/cmyk16.tiff" | grep --quiet '^ tiff:photometric: separated$' + +# PIL is unable to read 16 bit CMYK images +img2pdf "$tempdir/cmyk16.gif" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/cmyk16.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF RGB8" + +convert "$tempdir/normal.png" "$tempdir/normal.tiff" + +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/normal.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/normal.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/normal.tiff" tiff24nc + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/normal.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceRGB$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" +grep --quiet '^ /Colors 3$' "$tempdir/out.pdf" +grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" +grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/normal.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF RGBA8" + +convert "$tempdir/alpha.png" -depth 8 -strip "$tempdir/alpha8.tiff" + +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ tiff:alpha: unassociated$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/alpha8.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/alpha8.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha8.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF RGBA16" + +convert "$tempdir/alpha.png" -strip "$tempdir/alpha16.tiff" + +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Type: TrueColorAlpha$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Depth: 16-bit$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ tiff:alpha: unassociated$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/alpha16.tiff" | grep --quiet '^ tiff:photometric: RGB$' + +img2pdf "$tempdir/alpha16.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/alpha16.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF Gray1" + +convert "$tempdir/gray1.png" -depth 1 "$tempdir/gray1.tiff" + +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Type: Bilevel$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Depth: 1-bit$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/gray1.tiff" | grep --quiet '^ tiff:photometric: min-is-black$' + +img2pdf "$tempdir/gray1.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/gray1.png" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/gray1.png" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 true$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/gray1.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +for i in 2 4 8; do + echo "Test $j/$tests TIFF Gray$i" + + convert "$tempdir/gray$i.png" -depth $i "$tempdir/gray$i.tiff" + + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Mime type: image/tiff$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Colorspace: Gray$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Type: Grayscale$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Endianess: LSB$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet "^ Depth: $i-bit$" + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ tiff:alpha: unspecified$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ tiff:endian: lsb$' + identify -verbose "$tempdir/gray$i.tiff" | grep --quiet '^ tiff:photometric: min-is-black$' + + img2pdf "$tempdir/gray$i.tiff" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/gray$i.png" pnggray + + compare_pdfimages "$tempdir/out.pdf" "$tempdir/gray$i.png" + + # When saving a PNG, PIL will store it as 8-bit data + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent 8$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/gray$i.tiff" "$tempdir/out.pdf" + j=$((j+1)) +done + +################################################################################ +echo "Test $j/$tests TIFF Gray16" + +convert "$tempdir/gray16.png" -depth 16 "$tempdir/gray16.tiff" + +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Colorspace: Gray$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Type: Grayscale$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet "^ Depth: 16-bit$" +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/gray16.tiff" | grep --quiet '^ tiff:photometric: min-is-black$' + +img2pdf "$tempdir/gray16.tiff" /dev/null && rc=$? || rc=$? +if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 +fi + +rm "$tempdir/gray16.tiff" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF multipage" + +convert "$tempdir/normal.png" "$tempdir/inverse.png" -strip "$tempdir/multipage.tiff" + +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/multipage.tiff[0]" | grep --quiet '^ tiff:photometric: RGB$' + +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Mime type: image/tiff$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Colorspace: sRGB$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Type: TrueColor$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Endianess: LSB$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Depth: 8-bit$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Page geometry: 60x60+0+0$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Compression: Zip$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ tiff:alpha: unspecified$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ tiff:endian: lsb$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ tiff:photometric: RGB$' +identify -verbose "$tempdir/multipage.tiff[1]" | grep --quiet '^ Scene: 1$' + +img2pdf "$tempdir/multipage.tiff" "$tempdir/out.pdf" + +if [ "$(pdfinfo "$tempdir/out.pdf" | awk '/Pages:/ {print $2}')" != 2 ]; then + echo "pdf does not have 2 pages" + exit 1 +fi + +pdfseparate "$tempdir/out.pdf" "$tempdir/page-%d.pdf" +rm "$tempdir/out.pdf" + +for page in 1 2; do + compare_rendered "$tempdir/page-$page.pdf" "$tempdir/multipage.tiff[$((page-1))]" + + compare_pdfimages "$tempdir/page-$page.pdf" "$tempdir/multipage.tiff[$((page-1))]" + + # We cannot grep the PDF metadata here, because the page was + # rewritten into a non-greppable format by pdfseparate. but that's + # okay, because we already grepped single pages before and multipage + # PDF should not be different. + + rm "$tempdir/page-$page.pdf" +done + +rm "$tempdir/multipage.tiff" +j=$((j+1)) + +############################################################################### +for i in 1 2 4 8; do + echo "Test $j/$tests TIFF Palette$i" + + convert "$tempdir/palette$i.png" "$tempdir/palette$i.tiff" + + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Mime type: image/tiff$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Type: Palette$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Endianess: LSB$' + if [ "$i" -eq 8 ]; then + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet "^ Depth: 8-bit$" + else + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet "^ Depth: $i/8-bit$" + fi + case $i in + 1) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 2$';; + 2) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 4$';; + 4) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 16$';; + 8) identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Colormap entries: 256$';; + esac + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ tiff:alpha: unspecified$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ tiff:endian: lsb$' + identify -verbose "$tempdir/palette$i.tiff" | grep --quiet '^ tiff:photometric: palette$' + + img2pdf "$tempdir/palette$i.tiff" "$tempdir/out.pdf" + + compare_rendered "$tempdir/out.pdf" "$tempdir/palette$i.png" + + # pdfimages cannot export palette based images + + grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /ColorSpace \[ /Indexed /DeviceRGB ' "$tempdir/out.pdf" + grep --quiet '^ /BitsPerComponent '"$i"'$' "$tempdir/out.pdf" + grep --quiet '^ /Colors 1$' "$tempdir/out.pdf" + grep --quiet '^ /Predictor 15$' "$tempdir/out.pdf" + grep --quiet '^ /Filter /FlateDecode$' "$tempdir/out.pdf" + grep --quiet '^ /Height 60$' "$tempdir/out.pdf" + grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + + rm "$tempdir/out.pdf" + + rm "$tempdir/palette$i.tiff" + j=$((j+1)) +done + +############################################################################### +for i in 12 14 16; do + echo "Test $j/$tests TIFF RGB$i" + + convert "$tempdir/normal16.png" -depth "$i" "$tempdir/normal$i.tiff" + + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Format: TIFF (Tagged Image File Format)$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Mime type: image/tiff$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Geometry: 60x60+0+0$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Colorspace: sRGB$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Type: TrueColor$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Endianess: LSB$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet "^ Depth: $i-bit$" + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Page geometry: 60x60+0+0$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ Compression: Zip$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ tiff:alpha: unspecified$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ tiff:endian: lsb$' + identify -verbose "$tempdir/normal$i.tiff" | grep --quiet '^ tiff:photometric: RGB$' + + img2pdf "$tempdir/normal$i.tiff" /dev/null && rc=$? || rc=$? + if [ "$rc" -eq 0 ]; then + echo needs to fail here + exit 1 + fi + + rm "$tempdir/normal$i.tiff" + j=$((j+1)) +done + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, little endian, msb-to-lsb, min-is-white" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=lsb -define tiff:fill-order=msb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, big endian, msb-to-lsb, min-is-white" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=msb -define tiff:fill-order=msb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: MSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: msb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, big endian, lsb-to-msb, min-is-white" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=msb -define tiff:fill-order=lsb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: lsb-to-msb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: MSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: msb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, little endian, msb-to-lsb, min-is-black" + +# We create a min-is-black group4 tiff with PIL because it creates these by +# default (and without the option to do otherwise) whereas imagemagick only +# became able to do it through commit 00730551f0a34328685c59d0dde87dd9e366103a +# See https://www.imagemagick.org/discourse-server/viewtopic.php?f=1&t=34605 +python3 -c 'from PIL import Image;Image.open("'"$tempdir/gray1.png"'").save("'"$tempdir/group4.tiff"'",format="TIFF",compression="group4")' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-black' +# PIL doesn't set those +#tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +#tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-black' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 true$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, without fillorder, samples/pixel, bits/sample" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=lsb -define tiff:fill-order=msb -define quantum:polarity=min-is-white "$tempdir/group4.tiff" +# remove BitsPerSample (258) +tiffset -u 258 "$tempdir/group4.tiff" +# remove FillOrder (266) +tiffset -u 266 "$tempdir/group4.tiff" +# remove SamplesPerPixel (277) +tiffset -u 277 "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' && exit 1 +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' && exit 1 +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' && exit 1 +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +############################################################################### +echo "Test $j/$tests TIFF CCITT Group4, without rows-per-strip" + +convert "$tempdir/gray1.png" -compress group4 -define tiff:endian=lsb -define tiff:fill-order=msb -define quantum:polarity=min-is-white -define tiff:rows-per-strip=4294967295 "$tempdir/group4.tiff" +# remove RowsPerStrip (278) +tiffset -u 278 "$tempdir/group4.tiff" +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Bits/Sample: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Compression Scheme: CCITT Group 4' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Photometric Interpretation: min-is-white' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'FillOrder: msb-to-lsb' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Samples/Pixel: 1' +tiffinfo "$tempdir/group4.tiff" | grep --quiet 'Rows/Strip:' && exit 1 +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Type: Bilevel' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Endianess: LSB' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Depth: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'gray: 1-bit' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'Compression: Group4' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:endian: lsb' +identify -verbose "$tempdir/group4.tiff" | grep --quiet 'tiff:photometric: min-is-white' + +img2pdf "$tempdir/group4.tiff" "$tempdir/out.pdf" + +compare_rendered "$tempdir/out.pdf" "$tempdir/group4.tiff" pnggray + +compare_pdfimages "$tempdir/out.pdf" "$tempdir/group4.tiff" + +grep --quiet '^45.0000 0 0 45.0000 0.0000 0.0000 cm$' "$tempdir/out.pdf" +grep --quiet '^ /BitsPerComponent 1$' "$tempdir/out.pdf" +grep --quiet '^ /ColorSpace /DeviceGray$' "$tempdir/out.pdf" +grep --quiet '^ /BlackIs1 false$' "$tempdir/out.pdf" +grep --quiet '^ /Columns 60$' "$tempdir/out.pdf" +grep --quiet '^ /K -1$' "$tempdir/out.pdf" +grep --quiet '^ /Rows 60$' "$tempdir/out.pdf" +grep --quiet '^ /Filter \[ /CCITTFaxDecode \]$' "$tempdir/out.pdf" +grep --quiet '^ /Height 60$' "$tempdir/out.pdf" +grep --quiet '^ /Width 60$' "$tempdir/out.pdf" + +rm "$tempdir/group4.tiff" "$tempdir/out.pdf" +j=$((j+1)) + +rm "$tempdir/alpha.png" "$tempdir/normal.png" "$tempdir/inverse.png" "$tempdir/palette1.png" "$tempdir/palette2.png" "$tempdir/palette4.png" "$tempdir/palette8.png" "$tempdir/gray8.png" "$tempdir/normal16.png" "$tempdir/gray16.png" "$tempdir/gray4.png" "$tempdir/gray2.png" "$tempdir/gray1.png" +rmdir "$tempdir" + +trap - EXIT diff --git a/test_comp.sh b/test_comp.sh new file mode 100755 index 0000000..44edefd --- /dev/null +++ b/test_comp.sh @@ -0,0 +1,32 @@ +#!/bin/sh + +if [ $# -ne 1 ]; then + echo "usage: $0 image" + exit +fi + +echo "converting image to pdf, trying all compressions imagemagick has to offer" +echo "if, as a result, Zip/FlateDecode should NOT be the lossless compression with the lowest size ratio, contact me j [dot] schauer [at] email [dot] de" +echo "also, send me the image in question" +echo + +imsize=`stat -c "%s" "$1"` + +for a in `convert -list compress`; do + echo "encode:\t$a" + convert "$1" -compress $a "`basename $1 .jpg`.pdf" + pdfimages "`basename $1 .jpg`.pdf" "`basename $1 .jpg`" + printf "diff:\t" + diff=`compare -metric AE "$1" "\`basename $1 .jpg\`-000.ppm" null: 2>&1` + if [ "$diff" != "0" ]; then + echo "lossy" + else + echo "lossless" + fi + printf "size:\t" + pdfsize=`stat -c "%s" "\`basename $1 .jpg\`.pdf"` + echo "scale=1;$pdfsize/$imsize" | bc + printf "pdf:\t" + grep --max-count=1 --text /Filter "`basename $1 .jpg`.pdf" + echo +done -- cgit v1.2.3