summaryrefslogtreecommitdiff
path: root/src/img2pdf.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/img2pdf.py')
-rwxr-xr-xsrc/img2pdf.py2697
1 files changed, 2697 insertions, 0 deletions
diff --git a/src/img2pdf.py b/src/img2pdf.py
new file mode 100755
index 0000000..27e5b8c
--- /dev/null
+++ b/src/img2pdf.py
@@ -0,0 +1,2697 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2012-2014 Johannes 'josch' Schauer <j.schauer at email.de>
+#
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation, either
+# version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this program. If not, see
+# <http://www.gnu.org/licenses/>.
+
+import sys
+import os
+import zlib
+import argparse
+from PIL import Image, TiffImagePlugin
+
+# TiffImagePlugin.DEBUG = True
+from PIL.ExifTags import TAGS
+from datetime import datetime
+from jp2 import parsejp2
+from enum import Enum
+from io import BytesIO
+import logging
+import struct
+
+PY3 = sys.version_info[0] >= 3
+
+__version__ = "0.3.3"
+default_dpi = 96.0
+papersizes = {
+ "letter": "8.5inx11in",
+ "a0": "841mmx1189mm",
+ "a1": "594mmx841mm",
+ "a2": "420mmx594mm",
+ "a3": "297mmx420mm",
+ "a4": "210mmx297mm",
+ "a5": "148mmx210mm",
+ "a6": "105mmx148mm",
+ "legal": "8.5inx14in",
+ "tabloid": "11inx17in",
+}
+papernames = {
+ "letter": "Letter",
+ "a0": "A0",
+ "a1": "A1",
+ "a2": "A2",
+ "a3": "A3",
+ "a4": "A4",
+ "a5": "A5",
+ "a6": "A6",
+ "legal": "Legal",
+ "tabloid": "Tabloid",
+}
+
+
+FitMode = Enum("FitMode", "into fill exact shrink enlarge")
+
+PageOrientation = Enum("PageOrientation", "portrait landscape")
+
+Colorspace = Enum("Colorspace", "RGB L 1 CMYK CMYK;I RGBA P other")
+
+ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG TIFF other")
+
+PageMode = Enum("PageMode", "none outlines thumbs")
+
+PageLayout = Enum("PageLayout", "single onecolumn twocolumnright twocolumnleft")
+
+Magnification = Enum("Magnification", "fit fith fitbh")
+
+ImgSize = Enum("ImgSize", "abs perc dpi")
+
+Unit = Enum("Unit", "pt cm mm inch")
+
+ImgUnit = Enum("ImgUnit", "pt cm mm inch perc dpi")
+
+TIFFBitRevTable = [
+ 0x00,
+ 0x80,
+ 0x40,
+ 0xC0,
+ 0x20,
+ 0xA0,
+ 0x60,
+ 0xE0,
+ 0x10,
+ 0x90,
+ 0x50,
+ 0xD0,
+ 0x30,
+ 0xB0,
+ 0x70,
+ 0xF0,
+ 0x08,
+ 0x88,
+ 0x48,
+ 0xC8,
+ 0x28,
+ 0xA8,
+ 0x68,
+ 0xE8,
+ 0x18,
+ 0x98,
+ 0x58,
+ 0xD8,
+ 0x38,
+ 0xB8,
+ 0x78,
+ 0xF8,
+ 0x04,
+ 0x84,
+ 0x44,
+ 0xC4,
+ 0x24,
+ 0xA4,
+ 0x64,
+ 0xE4,
+ 0x14,
+ 0x94,
+ 0x54,
+ 0xD4,
+ 0x34,
+ 0xB4,
+ 0x74,
+ 0xF4,
+ 0x0C,
+ 0x8C,
+ 0x4C,
+ 0xCC,
+ 0x2C,
+ 0xAC,
+ 0x6C,
+ 0xEC,
+ 0x1C,
+ 0x9C,
+ 0x5C,
+ 0xDC,
+ 0x3C,
+ 0xBC,
+ 0x7C,
+ 0xFC,
+ 0x02,
+ 0x82,
+ 0x42,
+ 0xC2,
+ 0x22,
+ 0xA2,
+ 0x62,
+ 0xE2,
+ 0x12,
+ 0x92,
+ 0x52,
+ 0xD2,
+ 0x32,
+ 0xB2,
+ 0x72,
+ 0xF2,
+ 0x0A,
+ 0x8A,
+ 0x4A,
+ 0xCA,
+ 0x2A,
+ 0xAA,
+ 0x6A,
+ 0xEA,
+ 0x1A,
+ 0x9A,
+ 0x5A,
+ 0xDA,
+ 0x3A,
+ 0xBA,
+ 0x7A,
+ 0xFA,
+ 0x06,
+ 0x86,
+ 0x46,
+ 0xC6,
+ 0x26,
+ 0xA6,
+ 0x66,
+ 0xE6,
+ 0x16,
+ 0x96,
+ 0x56,
+ 0xD6,
+ 0x36,
+ 0xB6,
+ 0x76,
+ 0xF6,
+ 0x0E,
+ 0x8E,
+ 0x4E,
+ 0xCE,
+ 0x2E,
+ 0xAE,
+ 0x6E,
+ 0xEE,
+ 0x1E,
+ 0x9E,
+ 0x5E,
+ 0xDE,
+ 0x3E,
+ 0xBE,
+ 0x7E,
+ 0xFE,
+ 0x01,
+ 0x81,
+ 0x41,
+ 0xC1,
+ 0x21,
+ 0xA1,
+ 0x61,
+ 0xE1,
+ 0x11,
+ 0x91,
+ 0x51,
+ 0xD1,
+ 0x31,
+ 0xB1,
+ 0x71,
+ 0xF1,
+ 0x09,
+ 0x89,
+ 0x49,
+ 0xC9,
+ 0x29,
+ 0xA9,
+ 0x69,
+ 0xE9,
+ 0x19,
+ 0x99,
+ 0x59,
+ 0xD9,
+ 0x39,
+ 0xB9,
+ 0x79,
+ 0xF9,
+ 0x05,
+ 0x85,
+ 0x45,
+ 0xC5,
+ 0x25,
+ 0xA5,
+ 0x65,
+ 0xE5,
+ 0x15,
+ 0x95,
+ 0x55,
+ 0xD5,
+ 0x35,
+ 0xB5,
+ 0x75,
+ 0xF5,
+ 0x0D,
+ 0x8D,
+ 0x4D,
+ 0xCD,
+ 0x2D,
+ 0xAD,
+ 0x6D,
+ 0xED,
+ 0x1D,
+ 0x9D,
+ 0x5D,
+ 0xDD,
+ 0x3D,
+ 0xBD,
+ 0x7D,
+ 0xFD,
+ 0x03,
+ 0x83,
+ 0x43,
+ 0xC3,
+ 0x23,
+ 0xA3,
+ 0x63,
+ 0xE3,
+ 0x13,
+ 0x93,
+ 0x53,
+ 0xD3,
+ 0x33,
+ 0xB3,
+ 0x73,
+ 0xF3,
+ 0x0B,
+ 0x8B,
+ 0x4B,
+ 0xCB,
+ 0x2B,
+ 0xAB,
+ 0x6B,
+ 0xEB,
+ 0x1B,
+ 0x9B,
+ 0x5B,
+ 0xDB,
+ 0x3B,
+ 0xBB,
+ 0x7B,
+ 0xFB,
+ 0x07,
+ 0x87,
+ 0x47,
+ 0xC7,
+ 0x27,
+ 0xA7,
+ 0x67,
+ 0xE7,
+ 0x17,
+ 0x97,
+ 0x57,
+ 0xD7,
+ 0x37,
+ 0xB7,
+ 0x77,
+ 0xF7,
+ 0x0F,
+ 0x8F,
+ 0x4F,
+ 0xCF,
+ 0x2F,
+ 0xAF,
+ 0x6F,
+ 0xEF,
+ 0x1F,
+ 0x9F,
+ 0x5F,
+ 0xDF,
+ 0x3F,
+ 0xBF,
+ 0x7F,
+ 0xFF,
+]
+
+
+class NegativeDimensionError(Exception):
+ pass
+
+
+class UnsupportedColorspaceError(Exception):
+ pass
+
+
+class ImageOpenError(Exception):
+ pass
+
+
+class JpegColorspaceError(Exception):
+ pass
+
+
+class PdfTooLargeError(Exception):
+ pass
+
+
+# without pdfrw this function is a no-op
+def my_convert_load(string):
+ return string
+
+
+def parse(cont, indent=1):
+ if type(cont) is dict:
+ return (
+ b"<<\n"
+ + b"\n".join(
+ [
+ 4 * indent * b" " + k + b" " + parse(v, indent + 1)
+ for k, v in sorted(cont.items())
+ ]
+ )
+ + b"\n"
+ + 4 * (indent - 1) * b" "
+ + b">>"
+ )
+ elif type(cont) is int:
+ return str(cont).encode()
+ elif type(cont) is float:
+ if int(cont) == cont:
+ return parse(int(cont))
+ else:
+ return ("%0.4f" % cont).rstrip("0").encode()
+ elif isinstance(cont, MyPdfDict):
+ # if cont got an identifier, then addobj() has been called with it
+ # and a link to it will be added, otherwise add it inline
+ if hasattr(cont, "identifier"):
+ return ("%d 0 R" % cont.identifier).encode()
+ else:
+ return parse(cont.content, indent)
+ elif type(cont) is str or isinstance(cont, bytes):
+ if type(cont) is str and type(cont) is not bytes:
+ raise TypeError(
+ "parse must be passed a bytes object in py3. Got: %s" % cont
+ )
+ return cont
+ elif isinstance(cont, list):
+ return b"[ " + b" ".join([parse(c, indent) for c in cont]) + b" ]"
+ else:
+ raise TypeError("cannot handle type %s with content %s" % (type(cont), cont))
+
+
+class MyPdfDict(object):
+ def __init__(self, *args, **kw):
+ self.content = dict()
+ if args:
+ if len(args) == 1:
+ args = args[0]
+ self.content.update(args)
+ self.stream = None
+ for key, value in kw.items():
+ if key == "stream":
+ self.stream = value
+ self.content[MyPdfName.Length] = len(value)
+ elif key == "indirect":
+ pass
+ else:
+ self.content[getattr(MyPdfName, key)] = value
+
+ def tostring(self):
+ if self.stream is not None:
+ return (
+ ("%d 0 obj\n" % self.identifier).encode()
+ + parse(self.content)
+ + b"\nstream\n"
+ + self.stream
+ + b"\nendstream\nendobj\n"
+ )
+ else:
+ return (
+ ("%d 0 obj\n" % self.identifier).encode()
+ + parse(self.content)
+ + b"\nendobj\n"
+ )
+
+ def __setitem__(self, key, value):
+ self.content[key] = value
+
+ def __getitem__(self, key):
+ return self.content[key]
+
+
+class MyPdfName:
+ def __getattr__(self, name):
+ return b"/" + name.encode("ascii")
+
+
+MyPdfName = MyPdfName()
+
+
+class MyPdfObject(bytes):
+ def __new__(cls, string):
+ return bytes.__new__(cls, string.encode("ascii"))
+
+
+class MyPdfArray(list):
+ pass
+
+
+class MyPdfWriter:
+ def __init__(self, version="1.3"):
+ self.objects = []
+ # create an incomplete pages object so that a /Parent entry can be
+ # added to each page
+ self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0)
+ self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog)
+ self.version = version # default pdf version 1.3
+ self.pagearray = []
+
+ def addobj(self, obj):
+ newid = len(self.objects) + 1
+ obj.identifier = newid
+ self.objects.append(obj)
+
+ def tostream(self, info, stream):
+ xreftable = list()
+
+ # justification of the random binary garbage in the header from
+ # adobe:
+ #
+ # > Note: If a PDF file contains binary data, as most do (see Section
+ # > 3.1, “Lexical Conventions”), it is recommended that the header
+ # > line be immediately followed by a comment line containing at
+ # > least four binary characters—that is, characters whose codes are
+ # > 128 or greater. This ensures proper behavior of file transfer
+ # > applications that inspect data near the beginning of a file to
+ # > determine whether to treat the file’s contents as text or as
+ # > binary.
+ #
+ # the choice of binary characters is arbitrary but those four seem to
+ # be used elsewhere.
+ pdfheader = ("%%PDF-%s\n" % self.version).encode("ascii")
+ pdfheader += b"%\xe2\xe3\xcf\xd3\n"
+ stream.write(pdfheader)
+
+ # From section 3.4.3 of the PDF Reference (version 1.7):
+ #
+ # > Each entry is exactly 20 bytes long, including the end-of-line
+ # > marker.
+ # >
+ # > [...]
+ # >
+ # > The format of an in-use entry is
+ # > nnnnnnnnnn ggggg n eol
+ # > where
+ # > nnnnnnnnnn is a 10-digit byte offset
+ # > ggggg is a 5-digit generation number
+ # > n is a literal keyword identifying this as an in-use entry
+ # > eol is a 2-character end-of-line sequence
+ # >
+ # > [...]
+ # >
+ # > If the file’s end-of-line marker is a single character (either a
+ # > carriage return or a line feed), it is preceded by a single space;
+ #
+ # Since we chose to use a single character eol marker, we precede it by
+ # a space
+ pos = len(pdfheader)
+ xreftable.append(b"0000000000 65535 f \n")
+ for o in self.objects:
+ xreftable.append(("%010d 00000 n \n" % pos).encode())
+ content = o.tostring()
+ stream.write(content)
+ pos += len(content)
+
+ xrefoffset = pos
+ stream.write(b"xref\n")
+ stream.write(("0 %d\n" % len(xreftable)).encode())
+ for x in xreftable:
+ stream.write(x)
+ stream.write(b"trailer\n")
+ stream.write(
+ parse({b"/Size": len(xreftable), b"/Info": info, b"/Root": self.catalog})
+ + b"\n"
+ )
+ stream.write(b"startxref\n")
+ stream.write(("%d\n" % xrefoffset).encode())
+ stream.write(b"%%EOF\n")
+ return
+
+ def addpage(self, page):
+ page[b"/Parent"] = self.pages
+ self.pagearray.append(page)
+ self.pages.content[b"/Kids"].append(page)
+ self.pages.content[b"/Count"] += 1
+ self.addobj(page)
+
+
+if PY3:
+
+ class MyPdfString:
+ @classmethod
+ def encode(cls, string, hextype=False):
+ if hextype:
+ return (
+ b"< "
+ + b" ".join(("%06x" % c).encode("ascii") for c in string)
+ + b" >"
+ )
+ else:
+ try:
+ string = string.encode("ascii")
+ except UnicodeEncodeError:
+ string = b"\xfe\xff" + string.encode("utf-16-be")
+ # We should probably encode more here because at least
+ # ghostscript interpretes a carriage return byte (0x0D) as a
+ # new line byte (0x0A)
+ # PDF supports: \n, \r, \t, \b and \f
+ string = string.replace(b"\\", b"\\\\")
+ string = string.replace(b"(", b"\\(")
+ string = string.replace(b")", b"\\)")
+ return b"(" + string + b")"
+
+
+else:
+
+ class MyPdfString(object):
+ @classmethod
+ def encode(cls, string, hextype=False):
+ if hextype:
+ return (
+ b"< "
+ + b" ".join(("%06x" % c).encode("ascii") for c in string)
+ + b" >"
+ )
+ else:
+ # This mimics exactely to what pdfrw does.
+ string = string.replace(b"\\", b"\\\\")
+ string = string.replace(b"(", b"\\(")
+ string = string.replace(b")", b"\\)")
+ return b"(" + string + b")"
+
+
+class pdfdoc(object):
+ def __init__(
+ self,
+ version="1.3",
+ title=None,
+ author=None,
+ creator=None,
+ producer=None,
+ creationdate=None,
+ moddate=None,
+ subject=None,
+ keywords=None,
+ nodate=False,
+ panes=None,
+ initial_page=None,
+ magnification=None,
+ page_layout=None,
+ fit_window=False,
+ center_window=False,
+ fullscreen=False,
+ with_pdfrw=True,
+ ):
+ if with_pdfrw:
+ try:
+ from pdfrw import PdfWriter, PdfDict, PdfName, PdfString
+
+ self.with_pdfrw = True
+ except ImportError:
+ PdfWriter = MyPdfWriter
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfString = MyPdfString
+ self.with_pdfrw = False
+ else:
+ PdfWriter = MyPdfWriter
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfString = MyPdfString
+ self.with_pdfrw = False
+
+ now = datetime.now()
+ self.info = PdfDict(indirect=True)
+
+ def datetime_to_pdfdate(dt):
+ return dt.strftime("%Y%m%d%H%M%SZ")
+
+ if title is not None:
+ self.info[PdfName.Title] = PdfString.encode(title)
+ if author is not None:
+ self.info[PdfName.Author] = PdfString.encode(author)
+ if creator is not None:
+ self.info[PdfName.Creator] = PdfString.encode(creator)
+ if producer is not None and producer != "":
+ self.info[PdfName.Producer] = PdfString.encode(producer)
+ if creationdate is not None:
+ self.info[PdfName.CreationDate] = PdfString.encode(
+ "D:" + datetime_to_pdfdate(creationdate)
+ )
+ elif not nodate:
+ self.info[PdfName.CreationDate] = PdfString.encode(
+ "D:" + datetime_to_pdfdate(now)
+ )
+ if moddate is not None:
+ self.info[PdfName.ModDate] = PdfString.encode(
+ "D:" + datetime_to_pdfdate(moddate)
+ )
+ elif not nodate:
+ self.info[PdfName.ModDate] = PdfString.encode(
+ "D:" + datetime_to_pdfdate(now)
+ )
+ if subject is not None:
+ self.info[PdfName.Subject] = PdfString.encode(subject)
+ if keywords is not None:
+ self.info[PdfName.Keywords] = PdfString.encode(",".join(keywords))
+
+ self.writer = PdfWriter()
+ self.writer.version = version
+ # this is done because pdfrw adds info, catalog and pages as the first
+ # three objects in this order
+ if not self.with_pdfrw:
+ self.writer.addobj(self.info)
+ self.writer.addobj(self.writer.catalog)
+ self.writer.addobj(self.writer.pages)
+
+ self.panes = panes
+ self.initial_page = initial_page
+ self.magnification = magnification
+ self.page_layout = page_layout
+ self.fit_window = fit_window
+ self.center_window = center_window
+ self.fullscreen = fullscreen
+
+ def add_imagepage(
+ self,
+ color,
+ imgwidthpx,
+ imgheightpx,
+ imgformat,
+ imgdata,
+ imgwidthpdf,
+ imgheightpdf,
+ imgxpdf,
+ imgypdf,
+ pagewidth,
+ pageheight,
+ userunit=None,
+ palette=None,
+ inverted=False,
+ depth=0,
+ rotate=0,
+ ):
+ if self.with_pdfrw:
+ from pdfrw import PdfDict, PdfName, PdfObject, PdfString
+ from pdfrw.py23_diffs import convert_load
+ else:
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfObject = MyPdfObject
+ PdfString = MyPdfString
+ convert_load = my_convert_load
+
+ if color == Colorspace["1"] or color == Colorspace.L:
+ colorspace = PdfName.DeviceGray
+ elif color == Colorspace.RGB:
+ colorspace = PdfName.DeviceRGB
+ elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
+ colorspace = PdfName.DeviceCMYK
+ elif color == Colorspace.P:
+ if self.with_pdfrw:
+ raise Exception(
+ "pdfrw does not support hex strings for "
+ "palette image input, re-run with "
+ "--without-pdfrw"
+ )
+ colorspace = [
+ PdfName.Indexed,
+ PdfName.DeviceRGB,
+ len(palette) - 1,
+ PdfString.encode(palette, hextype=True),
+ ]
+ else:
+ raise UnsupportedColorspaceError("unsupported color space: %s" % color.name)
+
+ # either embed the whole jpeg or deflate the bitmap representation
+ if imgformat is ImageFormat.JPEG:
+ ofilter = PdfName.DCTDecode
+ elif imgformat is ImageFormat.JPEG2000:
+ ofilter = PdfName.JPXDecode
+ self.writer.version = "1.5" # jpeg2000 needs pdf 1.5
+ elif imgformat is ImageFormat.CCITTGroup4:
+ ofilter = [PdfName.CCITTFaxDecode]
+ else:
+ ofilter = PdfName.FlateDecode
+
+ image = PdfDict(stream=convert_load(imgdata))
+
+ image[PdfName.Type] = PdfName.XObject
+ image[PdfName.Subtype] = PdfName.Image
+ image[PdfName.Filter] = ofilter
+ image[PdfName.Width] = imgwidthpx
+ image[PdfName.Height] = imgheightpx
+ image[PdfName.ColorSpace] = colorspace
+ image[PdfName.BitsPerComponent] = depth
+
+ if color == Colorspace["CMYK;I"]:
+ # Inverts all four channels
+ image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
+
+ if imgformat is ImageFormat.CCITTGroup4:
+ decodeparms = PdfDict()
+ # The default for the K parameter is 0 which indicates Group 3 1-D
+ # encoding. We set it to -1 because we want Group 4 encoding.
+ decodeparms[PdfName.K] = -1
+ if inverted:
+ decodeparms[PdfName.BlackIs1] = PdfObject("false")
+ else:
+ decodeparms[PdfName.BlackIs1] = PdfObject("true")
+ decodeparms[PdfName.Columns] = imgwidthpx
+ decodeparms[PdfName.Rows] = imgheightpx
+ image[PdfName.DecodeParms] = [decodeparms]
+ elif imgformat is ImageFormat.PNG:
+ decodeparms = PdfDict()
+ decodeparms[PdfName.Predictor] = 15
+ if color in [Colorspace.P, Colorspace["1"], Colorspace.L]:
+ decodeparms[PdfName.Colors] = 1
+ else:
+ decodeparms[PdfName.Colors] = 3
+ decodeparms[PdfName.Columns] = imgwidthpx
+ decodeparms[PdfName.BitsPerComponent] = depth
+ image[PdfName.DecodeParms] = decodeparms
+
+ text = (
+ "q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ"
+ % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)
+ ).encode("ascii")
+
+ content = PdfDict(stream=convert_load(text))
+ resources = PdfDict(XObject=PdfDict(Im0=image))
+
+ page = PdfDict(indirect=True)
+ page[PdfName.Type] = PdfName.Page
+ page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight]
+ page[PdfName.Resources] = resources
+ page[PdfName.Contents] = content
+ if rotate != 0:
+ page[PdfName.Rotate] = rotate
+ if userunit is not None:
+ # /UserUnit requires PDF 1.6
+ if self.writer.version < "1.6":
+ self.writer.version = "1.6"
+ page[PdfName.UserUnit] = userunit
+
+ self.writer.addpage(page)
+
+ if not self.with_pdfrw:
+ self.writer.addobj(content)
+ self.writer.addobj(image)
+
+ def tostring(self):
+ stream = BytesIO()
+ self.tostream(stream)
+ return stream.getvalue()
+
+ def tostream(self, outputstream):
+ if self.with_pdfrw:
+ from pdfrw import PdfDict, PdfName, PdfArray, PdfObject
+ else:
+ PdfDict = MyPdfDict
+ PdfName = MyPdfName
+ PdfObject = MyPdfObject
+ PdfArray = MyPdfArray
+ NullObject = PdfObject("null")
+ TrueObject = PdfObject("true")
+
+ # We fill the catalog with more information like /ViewerPreferences,
+ # /PageMode, /PageLayout or /OpenAction because the latter refers to a
+ # page object which has to be present so that we can get its id.
+ #
+ # Furthermore, if using pdfrw, the trailer is cleared every time a page
+ # is added, so we can only start using it after all pages have been
+ # written.
+
+ if self.with_pdfrw:
+ catalog = self.writer.trailer.Root
+ else:
+ catalog = self.writer.catalog
+
+ if (
+ self.fullscreen
+ or self.fit_window
+ or self.center_window
+ or self.panes is not None
+ ):
+ catalog[PdfName.ViewerPreferences] = PdfDict()
+
+ if self.fullscreen:
+ # this setting might be overwritten later by the page mode
+ catalog[PdfName.ViewerPreferences][
+ PdfName.NonFullScreenPageMode
+ ] = PdfName.UseNone
+
+ if self.panes == PageMode.thumbs:
+ catalog[PdfName.ViewerPreferences][
+ PdfName.NonFullScreenPageMode
+ ] = PdfName.UseThumbs
+ # this setting might be overwritten later if fullscreen
+ catalog[PdfName.PageMode] = PdfName.UseThumbs
+ elif self.panes == PageMode.outlines:
+ catalog[PdfName.ViewerPreferences][
+ PdfName.NonFullScreenPageMode
+ ] = PdfName.UseOutlines
+ # this setting might be overwritten later if fullscreen
+ catalog[PdfName.PageMode] = PdfName.UseOutlines
+ elif self.panes in [PageMode.none, None]:
+ pass
+ else:
+ raise ValueError("unknown page mode: %s" % self.panes)
+
+ if self.fit_window:
+ catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject
+
+ if self.center_window:
+ catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = TrueObject
+
+ if self.fullscreen:
+ catalog[PdfName.PageMode] = PdfName.FullScreen
+
+ # see table 8.2 in section 8.2.1 in
+ # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf
+ # Fit - Fits the page to the window.
+ # FitH - Fits the width of the page to the window.
+ # FitV - Fits the height of the page to the window.
+ # FitR - Fits the rectangle specified by the four coordinates to the
+ # window.
+ # FitB - Fits the page bounding box to the window. This basically
+ # reduces the amount of whitespace (margins) that is displayed
+ # and thus focussing more on the text content.
+ # FitBH - Fits the width of the page bounding box to the window.
+ # FitBV - Fits the height of the page bounding box to the window.
+
+ # by default the initial page is the first one
+ initial_page = self.writer.pagearray[0]
+ # we set the open action here to make sure we open on the requested
+ # initial page but this value might be overwritten by a custom open
+ # action later while still taking the requested initial page into
+ # account
+ if self.initial_page is not None:
+ initial_page = self.writer.pagearray[self.initial_page - 1]
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.XYZ, NullObject, NullObject, 0]
+ )
+
+ if self.magnification == Magnification.fit:
+ catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit])
+ elif self.magnification == Magnification.fith:
+ pagewidth = initial_page[PdfName.MediaBox][2]
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.FitH, pagewidth]
+ )
+ elif self.magnification == Magnification.fitbh:
+ # quick hack to determine the image width on the page
+ imgwidth = float(initial_page[PdfName.Contents].stream.split()[4])
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.FitBH, imgwidth]
+ )
+ elif isinstance(self.magnification, float):
+ catalog[PdfName.OpenAction] = PdfArray(
+ [initial_page, PdfName.XYZ, NullObject, NullObject, self.magnification]
+ )
+ elif self.magnification is None:
+ pass
+ else:
+ raise ValueError("unknown magnification: %s" % self.magnification)
+
+ if self.page_layout == PageLayout.single:
+ catalog[PdfName.PageLayout] = PdfName.SinglePage
+ elif self.page_layout == PageLayout.onecolumn:
+ catalog[PdfName.PageLayout] = PdfName.OneColumn
+ elif self.page_layout == PageLayout.twocolumnright:
+ catalog[PdfName.PageLayout] = PdfName.TwoColumnRight
+ elif self.page_layout == PageLayout.twocolumnleft:
+ catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft
+ elif self.page_layout is None:
+ pass
+ else:
+ raise ValueError("unknown page layout: %s" % self.page_layout)
+
+ # now write out the PDF
+ if self.with_pdfrw:
+ self.writer.trailer.Info = self.info
+ self.writer.write(outputstream)
+ else:
+ self.writer.tostream(self.info, outputstream)
+
+
+def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
+ if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
+ # this codepath gets called if the PIL installation is not able to
+ # handle JPEG2000 files
+ imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
+
+ if hdpi is None:
+ hdpi = default_dpi
+ if vdpi is None:
+ vdpi = default_dpi
+ ndpi = (hdpi, vdpi)
+ else:
+ imgwidthpx, imgheightpx = imgdata.size
+
+ ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi))
+ # In python3, the returned dpi value for some tiff images will
+ # not be an integer but a float. To make the behaviour of
+ # img2pdf the same between python2 and python3, we convert that
+ # float into an integer by rounding.
+ # Search online for the 72.009 dpi problem for more info.
+ ndpi = (int(round(ndpi[0])), int(round(ndpi[1])))
+ ics = imgdata.mode
+
+ if ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info:
+ logging.warning(
+ "Image contains transparency which cannot be retained " "in PDF."
+ )
+ logging.warning("img2pdf will not perform a lossy operation.")
+ logging.warning("You can remove the alpha channel using imagemagick:")
+ logging.warning(
+ " $ convert input.png -background white -alpha "
+ "remove -alpha off output.png"
+ )
+ raise Exception("Refusing to work on images with alpha channel")
+
+ # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0
+ # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG
+ # header. In that case it can happen that the horizontal and vertical DPI
+ # are set to zero.
+ if ndpi == (0, 0):
+ ndpi = (default_dpi, default_dpi)
+
+ # PIL defaults to a dpi of 1 if a TIFF image does not specify the dpi.
+ # In that case, we want to use a different default.
+ if ndpi == (1, 1) and imgformat == ImageFormat.TIFF:
+ ndpi = (
+ imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi),
+ imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi),
+ )
+
+ logging.debug("input dpi = %d x %d", *ndpi)
+
+ rotation = 0
+ if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None:
+ for tag, value in imgdata._getexif().items():
+ if TAGS.get(tag, tag) == "Orientation":
+ # Detailed information on EXIF rotation tags:
+ # http://impulseadventure.com/photo/exif-orientation.html
+ if value == 1:
+ rotation = 0
+ elif value == 6:
+ rotation = 90
+ elif value == 3:
+ rotation = 180
+ elif value == 8:
+ rotation = 270
+ elif value in (2, 4, 5, 7):
+ raise Exception(
+ 'Image "%s": Unsupported flipped '
+ "rotation mode (%d)" % (im.name, value)
+ )
+ else:
+ raise Exception(
+ 'Image "%s": invalid rotation (%d)' % (im.name, value)
+ )
+
+ logging.debug("rotation = %d°", rotation)
+
+ if colorspace:
+ color = colorspace
+ logging.debug("input colorspace (forced) = %s", color)
+ else:
+ color = None
+ for c in Colorspace:
+ if c.name == ics:
+ color = c
+ if color is None:
+ # PIL does not provide the information about the original
+ # colorspace for 16bit grayscale PNG images. Thus, we retrieve
+ # that info manually by looking at byte 10 in the IHDR chunk. We
+ # know where to find that in the file because the IHDR chunk must
+ # be the first chunk
+ if (
+ rawdata is not None
+ and imgformat == ImageFormat.PNG
+ and rawdata[25] == 0
+ ):
+ color = Colorspace.L
+ else:
+ raise ValueError("unknown colorspace")
+ if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG:
+ # Adobe inverts CMYK JPEGs for some reason, and others
+ # have followed suit as well. Some software assumes the
+ # JPEG is inverted if the Adobe tag (APP14), while other
+ # software assumes all CMYK JPEGs are inverted. I don't
+ # have enough experience with these to know which is
+ # better for images currently in the wild, so I'm going
+ # with the first approach for now.
+ if "adobe" in imgdata.info:
+ color = Colorspace["CMYK;I"]
+ logging.debug("input colorspace = %s", color.name)
+
+ logging.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
+
+ return (color, ndpi, imgwidthpx, imgheightpx, rotation)
+
+
+def ccitt_payload_location_from_pil(img):
+ # If Pillow is passed an invalid compression argument it will ignore it;
+ # make sure the image actually got compressed.
+ if img.info["compression"] != "group4":
+ raise ValueError(
+ "Image not compressed with CCITT Group 4 but with: %s"
+ % img.info["compression"]
+ )
+
+ # Read the TIFF tags to find the offset(s) of the compressed data strips.
+ strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS]
+ strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
+ rows_per_strip = img.tag_v2.get(TiffImagePlugin.ROWSPERSTRIP, 2 ** 32 - 1)
+
+ # PIL always seems to create a single strip even for very large TIFFs when
+ # it saves images, so assume we only have to read a single strip.
+ # A test ~10 GPixel image was still encoded as a single strip. Just to be
+ # safe check throw an error if there is more than one offset.
+ if len(strip_offsets) != 1 or len(strip_bytes) != 1:
+ raise NotImplementedError("Transcoding multiple strips not supported")
+
+ (offset,), (length,) = strip_offsets, strip_bytes
+
+ logging.debug("TIFF strip_offsets: %d" % offset)
+ logging.debug("TIFF strip_bytes: %d" % length)
+
+ return offset, length
+
+
+def transcode_monochrome(imgdata):
+ """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
+
+ logging.debug("Converting monochrome to CCITT Group4")
+
+ # Convert the image to Group 4 in memory. If libtiff is not installed and
+ # Pillow is not compiled against it, .save() will raise an exception.
+ newimgio = BytesIO()
+
+ # we create a whole new PIL image or otherwise it might happen with some
+ # input images, that libtiff fails an assert and the whole process is
+ # killed by a SIGABRT:
+ # https://gitlab.mister-muffin.de/josch/img2pdf/issues/46
+ im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes())
+ im.save(newimgio, format="TIFF", compression="group4")
+
+ # Open new image in memory
+ newimgio.seek(0)
+ newimg = Image.open(newimgio)
+
+ offset, length = ccitt_payload_location_from_pil(newimg)
+
+ newimgio.seek(offset)
+ return newimgio.read(length)
+
+
+def parse_png(rawdata):
+ pngidat = b""
+ palette = []
+ i = 16
+ while i < len(rawdata):
+ # once we can require Python >= 3.2 we can use int.from_bytes() instead
+ n, = struct.unpack(">I", rawdata[i - 8 : i - 4])
+ if i + n > len(rawdata):
+ raise Exception("invalid png: %d %d %d" % (i, n, len(rawdata)))
+ if rawdata[i - 4 : i] == b"IDAT":
+ pngidat += rawdata[i : i + n]
+ elif rawdata[i - 4 : i] == b"PLTE":
+ # This could be as simple as saying "palette = rawdata[i:i+n]" but
+ # pdfrw does only escape parenthesis and backslashes in the raw
+ # byte stream. But raw carriage return bytes are interpreted as
+ # line feed bytes by ghostscript. So instead we use the hex string
+ # format. pdfrw cannot write it but at least ghostscript is happy
+ # with it. We would also write out the palette in binary format
+ # (and escape more bytes) but since we cannot use pdfrw anyways,
+ # we choose the more human readable variant.
+ # See https://github.com/pmaupin/pdfrw/issues/147
+ for j in range(i, i + n, 3):
+ # with int.from_bytes() we would not have to prepend extra
+ # zeroes
+ color, = struct.unpack(">I", b"\x00" + rawdata[j : j + 3])
+ palette.append(color)
+ i += n
+ i += 12
+ return pngidat, palette
+
+
+def read_images(rawdata, colorspace, first_frame_only=False):
+ im = BytesIO(rawdata)
+ im.seek(0)
+ imgdata = None
+ try:
+ imgdata = Image.open(im)
+ except IOError as e:
+ # test if it is a jpeg2000 image
+ if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+ raise ImageOpenError(
+ "cannot read input image (not jpeg2000). "
+ "PIL: error reading image: %s" % e
+ )
+ # image is jpeg2000
+ imgformat = ImageFormat.JPEG2000
+ else:
+ imgformat = None
+ for f in ImageFormat:
+ if f.name == imgdata.format:
+ imgformat = f
+ if imgformat is None:
+ imgformat = ImageFormat.other
+
+ logging.debug("imgformat = %s", imgformat.name)
+
+ # depending on the input format, determine whether to pass the raw
+ # image or the zlib compressed color information
+
+ # JPEG and JPEG2000 can be embedded into the PDF as-is
+ if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000:
+ color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata
+ )
+ if color == Colorspace["1"]:
+ raise JpegColorspaceError("jpeg can't be monochrome")
+ if color == Colorspace["P"]:
+ raise JpegColorspaceError("jpeg can't have a color palette")
+ if color == Colorspace["RGBA"]:
+ raise JpegColorspaceError("jpeg can't have an alpha channel")
+ im.close()
+ logging.debug("read_images() embeds a JPEG")
+ return [
+ (
+ color,
+ ndpi,
+ imgformat,
+ rawdata,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 8,
+ rotation,
+ )
+ ]
+
+ # We can directly embed the IDAT chunk of PNG images if the PNG is not
+ # interlaced
+ #
+ # PIL does not provide the information whether a PNG was stored interlaced
+ # or not. Thus, we retrieve that info manually by looking at byte 13 in the
+ # IHDR chunk. We know where to find that in the file because the IHDR chunk
+ # must be the first chunk.
+ if imgformat == ImageFormat.PNG and rawdata[28] == 0:
+ color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata
+ )
+ pngidat, palette = parse_png(rawdata)
+ im.close()
+ # PIL does not provide the information about the original bits per
+ # sample. Thus, we retrieve that info manually by looking at byte 9 in
+ # the IHDR chunk. We know where to find that in the file because the
+ # IHDR chunk must be the first chunk
+ depth = rawdata[24]
+ if depth not in [1, 2, 4, 8, 16]:
+ raise ValueError("invalid bit depth: %d" % depth)
+ logging.debug("read_images() embeds a PNG")
+ return [
+ (
+ color,
+ ndpi,
+ imgformat,
+ pngidat,
+ imgwidthpx,
+ imgheightpx,
+ palette,
+ False,
+ depth,
+ rotation,
+ )
+ ]
+
+ # If our input is not JPEG or PNG, then we might have a format that
+ # supports multiple frames (like TIFF or GIF), so we need a loop to
+ # iterate through all frames of the image.
+ #
+ # Each frame gets compressed using PNG compression *except* if:
+ #
+ # * The image is monochrome => encode using CCITT group 4
+ #
+ # * The image is CMYK => zip plain RGB data
+ #
+ # * We are handling a CCITT encoded TIFF frame => embed data
+
+ result = []
+ img_page_count = 0
+ # loop through all frames of the image (example: multipage TIFF)
+ while True:
+ try:
+ imgdata.seek(img_page_count)
+ except EOFError:
+ break
+
+ if first_frame_only and img_page_count > 0:
+ break
+
+ # PIL is unable to preserve the data of 16-bit RGB TIFF files and will
+ # convert it to 8-bit without the possibility to retrieve the original
+ # data
+ # https://github.com/python-pillow/Pillow/issues/1888
+ #
+ # Some tiff images do not have BITSPERSAMPLE set. Use this to create
+ # such a tiff: tiffset -u 258 test.tif
+ if (
+ imgformat == ImageFormat.TIFF
+ and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8
+ ):
+ raise ValueError("PIL is unable to preserve more than 8 bits per sample")
+
+ # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it
+ # only contains a single strip
+ if (
+ imgformat == ImageFormat.TIFF
+ and imgdata.info["compression"] == "group4"
+ and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1
+ ):
+ photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION]
+ inverted = False
+ if photo == 0:
+ inverted = True
+ elif photo != 1:
+ raise ValueError(
+ "unsupported photometric interpretation for "
+ "group4 tiff: %d" % photo
+ )
+ color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata
+ )
+ offset, length = ccitt_payload_location_from_pil(imgdata)
+ im.seek(offset)
+ rawdata = im.read(length)
+ fillorder = imgdata.tag_v2.get(TiffImagePlugin.FILLORDER)
+ if fillorder is None:
+ # no FillOrder: nothing to do
+ pass
+ elif fillorder == 1:
+ # msb-to-lsb: nothing to do
+ pass
+ elif fillorder == 2:
+ logging.debug("fillorder is lsb-to-msb => reverse bits")
+ # lsb-to-msb: reverse bits of each byte
+ rawdata = bytearray(rawdata)
+ for i in range(len(rawdata)):
+ rawdata[i] = TIFFBitRevTable[rawdata[i]]
+ rawdata = bytes(rawdata)
+ else:
+ raise ValueError("unsupported FillOrder: %d" % fillorder)
+ logging.debug("read_images() embeds Group4 from TIFF")
+ result.append(
+ (
+ color,
+ ndpi,
+ ImageFormat.CCITTGroup4,
+ rawdata,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ inverted,
+ 1,
+ rotation,
+ )
+ )
+ img_page_count += 1
+ continue
+
+ logging.debug("Converting frame: %d" % img_page_count)
+
+ color, ndpi, imgwidthpx, imgheightpx, rotation = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace
+ )
+
+ newimg = None
+ if color == Colorspace["1"]:
+ try:
+ ccittdata = transcode_monochrome(imgdata)
+ logging.debug("read_images() encoded a B/W image as CCITT group 4")
+ result.append(
+ (
+ color,
+ ndpi,
+ ImageFormat.CCITTGroup4,
+ ccittdata,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 1,
+ rotation,
+ )
+ )
+ img_page_count += 1
+ continue
+ except Exception as e:
+ logging.debug(e)
+ logging.debug("Converting colorspace 1 to L")
+ newimg = imgdata.convert("L")
+ color = Colorspace.L
+ elif color in [
+ Colorspace.RGB,
+ Colorspace.L,
+ Colorspace.CMYK,
+ Colorspace["CMYK;I"],
+ Colorspace.P,
+ ]:
+ logging.debug("Colorspace is OK: %s", color)
+ newimg = imgdata
+ else:
+ raise ValueError("unknown or unsupported colorspace: %s" % color.name)
+ # the PNG format does not support CMYK, so we fall back to normal
+ # compression
+ if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]:
+ imggz = zlib.compress(newimg.tobytes())
+ logging.debug("read_images() encoded CMYK with flate compression")
+ result.append(
+ (
+ color,
+ ndpi,
+ imgformat,
+ imggz,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 8,
+ rotation,
+ )
+ )
+ else:
+ # cheapo version to retrieve a PNG encoding of the payload is to
+ # just save it with PIL. In the future this could be replaced by
+ # dedicated function applying the Paeth PNG filter to the raw pixel
+ pngbuffer = BytesIO()
+ newimg.save(pngbuffer, format="png")
+ pngidat, palette = parse_png(pngbuffer.getvalue())
+ # PIL does not provide the information about the original bits per
+ # sample. Thus, we retrieve that info manually by looking at byte 9 in
+ # the IHDR chunk. We know where to find that in the file because the
+ # IHDR chunk must be the first chunk
+ pngbuffer.seek(24)
+ depth = ord(pngbuffer.read(1))
+ if depth not in [1, 2, 4, 8, 16]:
+ raise ValueError("invalid bit depth: %d" % depth)
+ logging.debug("read_images() encoded an image as PNG")
+ result.append(
+ (
+ color,
+ ndpi,
+ ImageFormat.PNG,
+ pngidat,
+ imgwidthpx,
+ imgheightpx,
+ palette,
+ False,
+ depth,
+ rotation,
+ )
+ )
+ img_page_count += 1
+ # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the
+ # close() method
+ try:
+ imgdata.close()
+ except AttributeError:
+ pass
+ im.close()
+ return result
+
+
+# converts a length in pixels to a length in PDF units (1/72 of an inch)
+def px_to_pt(length, dpi):
+ return 72.0 * length / dpi
+
+
+def cm_to_pt(length):
+ return (72.0 * length) / 2.54
+
+
+def mm_to_pt(length):
+ return (72.0 * length) / 25.4
+
+
+def in_to_pt(length):
+ return 72.0 * length
+
+
+def get_layout_fun(
+ pagesize=None, imgsize=None, border=None, fit=None, auto_orient=False
+):
+ def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight):
+ if fitwidth is None and fitheight is None:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ # if fit is fill or enlarge then it is okay if one of the dimensions
+ # are negative but one of them must still be positive
+ # if fit is not fill or enlarge then both dimensions must be positive
+ if (
+ fit in [FitMode.fill, FitMode.enlarge]
+ and fitwidth is not None
+ and fitwidth < 0
+ and fitheight is not None
+ and fitheight < 0
+ ):
+ raise ValueError(
+ "cannot fit into a rectangle where both " "dimensions are negative"
+ )
+ elif fit not in [FitMode.fill, FitMode.enlarge] and (
+ (fitwidth is not None and fitwidth < 0)
+ or (fitheight is not None and fitheight < 0)
+ ):
+ raise Exception(
+ "cannot fit into a rectangle where either " "dimensions are negative"
+ )
+
+ def default():
+ if fitwidth is not None and fitheight is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ if newimgheight > fitheight:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitwidth is None and fitheight is not None:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitheight is None and fitwidth is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return newimgwidth, newimgheight
+
+ if fit is None or fit == FitMode.into:
+ return default()
+ elif fit == FitMode.fill:
+ if fitwidth is not None and fitheight is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ if newimgheight < fitheight:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitwidth is None and fitheight is not None:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitheight is None and fitwidth is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return newimgwidth, newimgheight
+ elif fit == FitMode.exact:
+ if fitwidth is not None and fitheight is not None:
+ return fitwidth, fitheight
+ elif fitwidth is None and fitheight is not None:
+ newimgheight = fitheight
+ newimgwidth = (newimgheight * imgwidth) / imgheight
+ elif fitheight is None and fitwidth is not None:
+ newimgwidth = fitwidth
+ newimgheight = (newimgwidth * imgheight) / imgwidth
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return newimgwidth, newimgheight
+ elif fit == FitMode.shrink:
+ if fitwidth is not None and fitheight is not None:
+ if imgwidth <= fitwidth and imgheight <= fitheight:
+ return imgwidth, imgheight
+ elif fitwidth is None and fitheight is not None:
+ if imgheight <= fitheight:
+ return imgwidth, imgheight
+ elif fitheight is None and fitwidth is not None:
+ if imgwidth <= fitwidth:
+ return imgwidth, imgheight
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return default()
+ elif fit == FitMode.enlarge:
+ if fitwidth is not None and fitheight is not None:
+ if imgwidth > fitwidth or imgheight > fitheight:
+ return imgwidth, imgheight
+ elif fitwidth is None and fitheight is not None:
+ if imgheight > fitheight:
+ return imgwidth, imgheight
+ elif fitheight is None and fitwidth is not None:
+ if imgwidth > fitwidth:
+ return imgwidth, imgheight
+ else:
+ raise ValueError("fitwidth and fitheight cannot both be None")
+ return default()
+ else:
+ raise NotImplementedError
+
+ # if no layout arguments are given, then the image size is equal to the
+ # page size and will be drawn with the default dpi
+ if pagesize is None and imgsize is None and border is None:
+ return default_layout_fun
+ if pagesize is None and imgsize is None and border is not None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0])
+ imgheightpdf = px_to_pt(imgheightpx, ndpi[1])
+ pagewidth = imgwidthpdf + 2 * border[1]
+ pageheight = imgheightpdf + 2 * border[0]
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+ if border is None:
+ border = (0, 0)
+ # if the pagesize is given but the imagesize is not, then the imagesize
+ # will be calculated from the pagesize, taking into account the border
+ # and the fitting
+ if pagesize is not None and imgsize is None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ if (
+ pagesize[0] is not None
+ and pagesize[1] is not None
+ and auto_orient
+ and (
+ (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1])
+ or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])
+ )
+ ):
+ pagewidth, pageheight = pagesize[1], pagesize[0]
+ newborder = border[1], border[0]
+ else:
+ pagewidth, pageheight = pagesize[0], pagesize[1]
+ newborder = border
+ if pagewidth is not None:
+ fitwidth = pagewidth - 2 * newborder[1]
+ else:
+ fitwidth = None
+ if pageheight is not None:
+ fitheight = pageheight - 2 * newborder[0]
+ else:
+ fitheight = None
+ if (
+ fit in [FitMode.fill, FitMode.enlarge]
+ and fitwidth is not None
+ and fitwidth < 0
+ and fitheight is not None
+ and fitheight < 0
+ ):
+ raise NegativeDimensionError(
+ "at least one border dimension musts be smaller than half "
+ "the respective page dimension"
+ )
+ elif fit not in [FitMode.fill, FitMode.enlarge] and (
+ (fitwidth is not None and fitwidth < 0)
+ or (fitheight is not None and fitheight < 0)
+ ):
+ raise NegativeDimensionError(
+ "one border dimension is larger than half of the "
+ "respective page dimension"
+ )
+ imgwidthpdf, imgheightpdf = fitfun(
+ fit,
+ px_to_pt(imgwidthpx, ndpi[0]),
+ px_to_pt(imgheightpx, ndpi[1]),
+ fitwidth,
+ fitheight,
+ )
+ if pagewidth is None:
+ pagewidth = imgwidthpdf + border[1] * 2
+ if pageheight is None:
+ pageheight = imgheightpdf + border[0] * 2
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+
+ def scale_imgsize(s, px, dpi):
+ if s is None:
+ return None
+ mode, value = s
+ if mode == ImgSize.abs:
+ return value
+ if mode == ImgSize.perc:
+ return (px_to_pt(px, dpi) * value) / 100
+ if mode == ImgSize.dpi:
+ return px_to_pt(px, value)
+ raise NotImplementedError
+
+ if pagesize is None and imgsize is not None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ imgwidthpdf, imgheightpdf = fitfun(
+ fit,
+ px_to_pt(imgwidthpx, ndpi[0]),
+ px_to_pt(imgheightpx, ndpi[1]),
+ scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
+ scale_imgsize(imgsize[1], imgheightpx, ndpi[1]),
+ )
+ pagewidth = imgwidthpdf + 2 * border[1]
+ pageheight = imgheightpdf + 2 * border[0]
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+ if pagesize is not None and imgsize is not None:
+
+ def layout_fun(imgwidthpx, imgheightpx, ndpi):
+ if (
+ pagesize[0] is not None
+ and pagesize[1] is not None
+ and auto_orient
+ and (
+ (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1])
+ or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1])
+ )
+ ):
+ pagewidth, pageheight = pagesize[1], pagesize[0]
+ else:
+ pagewidth, pageheight = pagesize[0], pagesize[1]
+ imgwidthpdf, imgheightpdf = fitfun(
+ fit,
+ px_to_pt(imgwidthpx, ndpi[0]),
+ px_to_pt(imgheightpx, ndpi[1]),
+ scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]),
+ scale_imgsize(imgsize[1], imgheightpx, ndpi[1]),
+ )
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+ return layout_fun
+ raise NotImplementedError
+
+
+def default_layout_fun(imgwidthpx, imgheightpx, ndpi):
+ imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0])
+ imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1])
+ return pagewidth, pageheight, imgwidthpdf, imgheightpdf
+
+
+def get_fixed_dpi_layout_fun(fixed_dpi):
+ """Layout function that overrides whatever DPI is claimed in input images.
+
+ >>> layout_fun = get_fixed_dpi_layout_fun((300, 300))
+ >>> convert(image1, layout_fun=layout_fun, ... outputstream=...)
+ """
+
+ def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi):
+ return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi)
+
+ return fixed_dpi_layout_fun
+
+
+def find_scale(pagewidth, pageheight):
+ """Find the power of 10 (10, 100, 1000...) that will reduce the scale
+ below the PDF specification limit of 14400 PDF units (=200 inches)"""
+ from math import log10, ceil
+
+ major = max(pagewidth, pageheight)
+ oversized = major / 14400.0
+
+ return 10 ** ceil(log10(oversized))
+
+
+# given one or more input image, depending on outputstream, either return a
+# string containing the whole PDF if outputstream is None or write the PDF
+# data to the given file-like object and return None
+#
+# Input images can be given as file like objects (they must implement read()),
+# as a binary string representing the image content or as filenames to the
+# images.
+def convert(*images, **kwargs):
+
+ _default_kwargs = dict(
+ title=None,
+ author=None,
+ creator=None,
+ producer=None,
+ creationdate=None,
+ moddate=None,
+ subject=None,
+ keywords=None,
+ colorspace=None,
+ nodate=False,
+ layout_fun=default_layout_fun,
+ viewer_panes=None,
+ viewer_initial_page=None,
+ viewer_magnification=None,
+ viewer_page_layout=None,
+ viewer_fit_window=False,
+ viewer_center_window=False,
+ viewer_fullscreen=False,
+ with_pdfrw=True,
+ outputstream=None,
+ first_frame_only=False,
+ allow_oversized=True,
+ )
+ for kwname, default in _default_kwargs.items():
+ if kwname not in kwargs:
+ kwargs[kwname] = default
+
+ pdf = pdfdoc(
+ "1.3",
+ kwargs["title"],
+ kwargs["author"],
+ kwargs["creator"],
+ kwargs["producer"],
+ kwargs["creationdate"],
+ kwargs["moddate"],
+ kwargs["subject"],
+ kwargs["keywords"],
+ kwargs["nodate"],
+ kwargs["viewer_panes"],
+ kwargs["viewer_initial_page"],
+ kwargs["viewer_magnification"],
+ kwargs["viewer_page_layout"],
+ kwargs["viewer_fit_window"],
+ kwargs["viewer_center_window"],
+ kwargs["viewer_fullscreen"],
+ kwargs["with_pdfrw"],
+ )
+
+ # backwards compatibility with older img2pdf versions where the first
+ # argument to the function had to be given as a list
+ if len(images) == 1:
+ # if only one argument was given and it is a list, expand it
+ if isinstance(images[0], (list, tuple)):
+ images = images[0]
+
+ if not isinstance(images, (list, tuple)):
+ images = [images]
+
+ for img in images:
+ # img is allowed to be a path, a binary string representing image data
+ # or a file-like object (really anything that implements read())
+ try:
+ rawdata = img.read()
+ except AttributeError:
+ if not isinstance(img, (str, bytes)):
+ raise TypeError("Neither implements read() nor is str or bytes")
+ # the thing doesn't have a read() function, so try if we can treat
+ # it as a file name
+ try:
+ with open(img, "rb") as f:
+ rawdata = f.read()
+ except Exception:
+ # whatever the exception is (string could contain NUL
+ # characters or the path could just not exist) it's not a file
+ # name so we now try treating it as raw image content
+ rawdata = img
+
+ for (
+ color,
+ ndpi,
+ imgformat,
+ imgdata,
+ imgwidthpx,
+ imgheightpx,
+ palette,
+ inverted,
+ depth,
+ rotation,
+ ) in read_images(rawdata, kwargs["colorspace"], kwargs["first_frame_only"]):
+ pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
+ imgwidthpx, imgheightpx, ndpi
+ )
+
+ userunit = None
+ if pagewidth < 3.00 or pageheight < 3.00:
+ logging.warning(
+ "pdf width or height is below 3.00 - too " "small for some viewers!"
+ )
+ elif pagewidth > 14400.0 or pageheight > 14400.0:
+ if kwargs["allow_oversized"]:
+ userunit = find_scale(pagewidth, pageheight)
+ pagewidth /= userunit
+ pageheight /= userunit
+ imgwidthpdf /= userunit
+ imgheightpdf /= userunit
+ else:
+ raise PdfTooLargeError(
+ "pdf width or height must not exceed 200 inches."
+ )
+ # the image is always centered on the page
+ imgxpdf = (pagewidth - imgwidthpdf) / 2.0
+ imgypdf = (pageheight - imgheightpdf) / 2.0
+ pdf.add_imagepage(
+ color,
+ imgwidthpx,
+ imgheightpx,
+ imgformat,
+ imgdata,
+ imgwidthpdf,
+ imgheightpdf,
+ imgxpdf,
+ imgypdf,
+ pagewidth,
+ pageheight,
+ userunit,
+ palette,
+ inverted,
+ depth,
+ rotation,
+ )
+
+ if kwargs["outputstream"]:
+ pdf.tostream(kwargs["outputstream"])
+ return
+
+ return pdf.tostring()
+
+
+def parse_num(num, name):
+ if num == "":
+ return None
+ unit = None
+ if num.endswith("pt"):
+ unit = Unit.pt
+ elif num.endswith("cm"):
+ unit = Unit.cm
+ elif num.endswith("mm"):
+ unit = Unit.mm
+ elif num.endswith("in"):
+ unit = Unit.inch
+ else:
+ try:
+ num = float(num)
+ except ValueError:
+ msg = (
+ "%s is not a floating point number and doesn't have a "
+ "valid unit: %s" % (name, num)
+ )
+ raise argparse.ArgumentTypeError(msg)
+ if unit is None:
+ unit = Unit.pt
+ else:
+ num = num[:-2]
+ try:
+ num = float(num)
+ except ValueError:
+ msg = "%s is not a floating point number: %s" % (name, num)
+ raise argparse.ArgumentTypeError(msg)
+ if unit == Unit.cm:
+ num = cm_to_pt(num)
+ elif unit == Unit.mm:
+ num = mm_to_pt(num)
+ elif unit == Unit.inch:
+ num = in_to_pt(num)
+ return num
+
+
+def parse_imgsize_num(num, name):
+ if num == "":
+ return None
+ unit = None
+ if num.endswith("pt"):
+ unit = ImgUnit.pt
+ elif num.endswith("cm"):
+ unit = ImgUnit.cm
+ elif num.endswith("mm"):
+ unit = ImgUnit.mm
+ elif num.endswith("in"):
+ unit = ImgUnit.inch
+ elif num.endswith("dpi"):
+ unit = ImgUnit.dpi
+ elif num.endswith("%"):
+ unit = ImgUnit.perc
+ else:
+ try:
+ num = float(num)
+ except ValueError:
+ msg = (
+ "%s is not a floating point number and doesn't have a "
+ "valid unit: %s" % (name, num)
+ )
+ raise argparse.ArgumentTypeError(msg)
+ if unit is None:
+ unit = ImgUnit.pt
+ else:
+ # strip off unit from string
+ if unit == ImgUnit.dpi:
+ num = num[:-3]
+ elif unit == ImgUnit.perc:
+ num = num[:-1]
+ else:
+ num = num[:-2]
+ try:
+ num = float(num)
+ except ValueError:
+ msg = "%s is not a floating point number: %s" % (name, num)
+ raise argparse.ArgumentTypeError(msg)
+ if unit == ImgUnit.cm:
+ num = (ImgSize.abs, cm_to_pt(num))
+ elif unit == ImgUnit.mm:
+ num = (ImgSize.abs, mm_to_pt(num))
+ elif unit == ImgUnit.inch:
+ num = (ImgSize.abs, in_to_pt(num))
+ elif unit == ImgUnit.pt:
+ num = (ImgSize.abs, num)
+ elif unit == ImgUnit.dpi:
+ num = (ImgSize.dpi, num)
+ elif unit == ImgUnit.perc:
+ num = (ImgSize.perc, num)
+ return num
+
+
+def parse_pagesize_rectarg(string):
+ transposed = string.endswith("^T")
+ if transposed:
+ string = string[:-2]
+ if papersizes.get(string.lower()):
+ string = papersizes[string.lower()]
+ if "x" not in string:
+ # if there is no separating "x" in the string, then the string is
+ # interpreted as the width
+ w = parse_num(string, "width")
+ h = None
+ else:
+ w, h = string.split("x", 1)
+ w = parse_num(w, "width")
+ h = parse_num(h, "height")
+ if transposed:
+ w, h = h, w
+ if w is None and h is None:
+ raise argparse.ArgumentTypeError("at least one dimension must be " "specified")
+ return w, h
+
+
+def parse_imgsize_rectarg(string):
+ transposed = string.endswith("^T")
+ if transposed:
+ string = string[:-2]
+ if papersizes.get(string.lower()):
+ string = papersizes[string.lower()]
+ if "x" not in string:
+ # if there is no separating "x" in the string, then the string is
+ # interpreted as the width
+ w = parse_imgsize_num(string, "width")
+ h = None
+ else:
+ w, h = string.split("x", 1)
+ w = parse_imgsize_num(w, "width")
+ h = parse_imgsize_num(h, "height")
+ if transposed:
+ w, h = h, w
+ if w is None and h is None:
+ raise argparse.ArgumentTypeError("at least one dimension must be " "specified")
+ return w, h
+
+
+def parse_colorspacearg(string):
+ for c in Colorspace:
+ if c.name == string:
+ return c
+ allowed = ", ".join([c.name for c in Colorspace])
+ raise argparse.ArgumentTypeError(
+ "Unsupported colorspace: %s. Must be one " "of: %s." % (string, allowed)
+ )
+
+
+def parse_borderarg(string):
+ if ":" in string:
+ h, v = string.split(":", 1)
+ if h == "":
+ raise argparse.ArgumentTypeError("missing value before colon")
+ if v == "":
+ raise argparse.ArgumentTypeError("missing value after colon")
+ else:
+ if string == "":
+ raise argparse.ArgumentTypeError("border option cannot be empty")
+ h, v = string, string
+ h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border")
+ if h is None and v is None:
+ raise argparse.ArgumentTypeError("missing value")
+ return h, v
+
+
+def input_images(path):
+ if path == "-":
+ # we slurp in all data from stdin because we need to seek in it later
+ if PY3:
+ result = sys.stdin.buffer.read()
+ else:
+ result = sys.stdin.read()
+ if len(result) == 0:
+ raise argparse.ArgumentTypeError('"%s" is empty' % path)
+ else:
+ if PY3:
+ try:
+ if os.path.getsize(path) == 0:
+ raise argparse.ArgumentTypeError('"%s" is empty' % path)
+ # test-read a byte from it so that we can abort early in case
+ # we cannot read data from the file
+ with open(path, "rb") as im:
+ im.read(1)
+ except IsADirectoryError:
+ raise argparse.ArgumentTypeError('"%s" is a directory' % path)
+ except PermissionError:
+ raise argparse.ArgumentTypeError('"%s" permission denied' % path)
+ except FileNotFoundError:
+ raise argparse.ArgumentTypeError('"%s" does not exist' % path)
+ else:
+ try:
+ if os.path.getsize(path) == 0:
+ raise argparse.ArgumentTypeError('"%s" is empty' % path)
+ # test-read a byte from it so that we can abort early in case
+ # we cannot read data from the file
+ with open(path, "rb") as im:
+ im.read(1)
+ except IOError as err:
+ raise argparse.ArgumentTypeError(str(err))
+ except OSError as err:
+ raise argparse.ArgumentTypeError(str(err))
+ result = path
+ return result
+
+
+def parse_fitarg(string):
+ for m in FitMode:
+ if m.name == string.lower():
+ return m
+ raise argparse.ArgumentTypeError("unknown fit mode: %s" % string)
+
+
+def parse_panes(string):
+ for m in PageMode:
+ if m.name == string.lower():
+ return m
+ allowed = ", ".join([m.name for m in PageMode])
+ raise argparse.ArgumentTypeError(
+ "Unsupported page mode: %s. Must be one " "of: %s." % (string, allowed)
+ )
+
+
+def parse_magnification(string):
+ for m in Magnification:
+ if m.name == string.lower():
+ return m
+ try:
+ return float(string)
+ except ValueError:
+ pass
+ allowed = ", ".join([m.name for m in Magnification])
+ raise argparse.ArgumentTypeError(
+ "Unsupported magnification: %s. Must be "
+ "a floating point number or one of: %s." % (string, allowed)
+ )
+
+
+def parse_layout(string):
+ for l in PageLayout:
+ if l.name == string.lower():
+ return l
+ allowed = ", ".join([l.name for l in PageLayout])
+ raise argparse.ArgumentTypeError(
+ "Unsupported page layout: %s. Must be " "one of: %s." % (string, allowed)
+ )
+
+
+def valid_date(string):
+ # first try parsing in ISO8601 format
+ try:
+ return datetime.strptime(string, "%Y-%m-%d")
+ except ValueError:
+ pass
+ try:
+ return datetime.strptime(string, "%Y-%m-%dT%H:%M")
+ except ValueError:
+ pass
+ try:
+ return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S")
+ except ValueError:
+ pass
+ # then try dateutil
+ try:
+ from dateutil import parser
+ except ImportError:
+ pass
+ else:
+ try:
+ return parser.parse(string)
+ except TypeError:
+ pass
+ # as a last resort, try the local date utility
+ try:
+ import subprocess
+ except ImportError:
+ pass
+ else:
+ try:
+ utime = subprocess.check_output(["date", "--date", string, "+%s"])
+ except subprocess.CalledProcessError:
+ pass
+ else:
+ return datetime.utcfromtimestamp(int(utime))
+ raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
+
+
+def main(argv=sys.argv):
+ rendered_papersizes = ""
+ for k, v in sorted(papersizes.items()):
+ rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
+
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ description="""\
+Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and
+JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000
+images with the only added file size coming from the PDF container itself.
+Other raster graphics formats are losslessly stored using the same encoding
+that PNG uses. Since PDF does not support images with transparency and since
+img2pdf aims to never be lossy, input images with an alpha channel are not
+supported.
+
+The output is sent to standard output so that it can be redirected into a file
+or to another program as part of a shell pipe. To directly write the output
+into a file, use the -o or --output option.
+
+Options:
+""",
+ epilog="""\
+Colorspace:
+ Currently, the colorspace must be forced for JPEG 2000 images that are not in
+ the RGB colorspace. Available colorspace options are based on Python Imaging
+ Library (PIL) short handles.
+
+ RGB RGB color
+ L Grayscale
+ 1 Black and white (internally converted to grayscale)
+ CMYK CMYK color
+ CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe)
+
+Paper sizes:
+ You can specify the short hand paper size names shown in the first column in
+ the table below as arguments to the --pagesize and --imgsize options. The
+ width and height they are mapping to is shown in the second column. Giving
+ the value in the second column has the same effect as giving the short hand
+ in the first column. Appending ^T (a caret/circumflex followed by the letter
+ T) turns the paper size from portrait into landscape. The postfix thus
+ symbolizes the transpose. The values are case insensitive.
+
+%s
+
+Fit options:
+ The img2pdf options for the --fit argument are shown in the first column in
+ the table below. The function of these options can be mapped to the geometry
+ operators of imagemagick. For users who are familiar with imagemagick, the
+ corresponding operator is shown in the second column. The third column shows
+ whether or not the aspect ratio is preserved for that option (same as in
+ imagemagick). Just like imagemagick, img2pdf tries hard to preserve the
+ aspect ratio, so if the --fit argument is not given, then the default is
+ "into" which corresponds to the absence of any operator in imagemagick.
+ The value of the --fit option is case insensitive.
+
+ into | | Y | The default. Width and height values specify maximum
+ | | | values.
+ ---------+---+---+----------------------------------------------------------
+ fill | ^ | Y | Width and height values specify the minimum values.
+ ---------+---+---+----------------------------------------------------------
+ exact | ! | N | Width and height emphatically given.
+ ---------+---+---+----------------------------------------------------------
+ shrink | > | Y | Shrinks an image with dimensions larger than the given
+ | | | ones (and otherwise behaves like "into").
+ ---------+---+---+----------------------------------------------------------
+ enlarge | < | Y | Enlarges an image with dimensions smaller than the given
+ | | | ones (and otherwise behaves like "into").
+
+Argument parsing:
+ Argument long options can be abbreviated to a prefix if the abbreviation is
+ unambiguous. That is, the prefix must match a unique option.
+
+ Beware of your shell interpreting argument values as special characters (like
+ the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
+ values in single quotes.
+
+ If you want an argument value to start with one or more minus characters, you
+ must use the long option name and join them with an equal sign like so:
+
+ $ img2pdf --author=--test--
+
+ If your input file name starts with one or more minus characters, either
+ separate the input files from the other arguments by two minus signs:
+
+ $ img2pdf -- --my-file-starts-with-two-minuses.jpg
+
+ Or be more explicit about its relative path by prepending a ./:
+
+ $ img2pdf ./--my-file-starts-with-two-minuses.jpg
+
+ The order of non-positional arguments (all arguments other than the input
+ images) does not matter.
+
+Examples:
+ Lines starting with a dollar sign denote commands you can enter into your
+ terminal. The dollar sign signifies your command prompt. It is not part of
+ the command you type.
+
+ Convert two scans in JPEG format to a PDF document.
+
+ $ img2pdf --output out.pdf page1.jpg page2.jpg
+
+ Convert a directory of JPEG images into a PDF with printable A4 pages in
+ landscape mode. On each page, the photo takes the maximum amount of space
+ while preserving its aspect ratio and a print border of 2 cm on the top and
+ bottom and 2.5 cm on the left and right hand side.
+
+ $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
+
+ On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
+ original image size if the image is smaller than that.
+
+ $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg
+
+ Prepare a directory of photos to be printed borderless on photo paper with a
+ 3:2 aspect ratio and rotate each page so that its orientation is the same as
+ the input image.
+
+ $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg
+
+ Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf
+ cannot read it from the JPEG2000 file automatically.
+
+ $ img2pdf --output out.pdf --colorspace L input.jp2
+
+Written by Johannes 'josch' Schauer <josch@mister-muffin.de>
+
+Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues
+"""
+ % rendered_papersizes,
+ )
+
+ parser.add_argument(
+ "images",
+ metavar="infile",
+ type=input_images,
+ nargs="*",
+ help="Specifies the input file(s) in any format that can be read by "
+ "the Python Imaging Library (PIL). If no input images are given, then "
+ 'a single image is read from standard input. The special filename "-" '
+ "can be used once to read an image from standard input. To read a "
+ 'file in the current directory with the filename "-", pass it to '
+ 'img2pdf by explicitly stating its relative path like "./-".',
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help="Makes the program operate in verbose mode, printing messages on "
+ "standard error.",
+ )
+ parser.add_argument(
+ "-V",
+ "--version",
+ action="version",
+ version="%(prog)s " + __version__,
+ help="Prints version information and exits.",
+ )
+
+ outargs = parser.add_argument_group(
+ title="General output arguments",
+ description="Arguments controlling the output format.",
+ )
+
+ # In Python3 we have to output to sys.stdout.buffer because we write are
+ # bytes and not strings. In certain situations, like when the main
+ # function is wrapped by contextlib.redirect_stdout(), sys.stdout does not
+ # have the buffer attribute. Thus we write to sys.stdout by default and
+ # to sys.stdout.buffer if it exists.
+ outargs.add_argument(
+ "-o",
+ "--output",
+ metavar="out",
+ type=argparse.FileType("wb"),
+ default=sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout,
+ help="Makes the program output to a file instead of standard output.",
+ )
+ outargs.add_argument(
+ "-C",
+ "--colorspace",
+ metavar="colorspace",
+ type=parse_colorspacearg,
+ help="""
+Forces the PIL colorspace. See the epilogue for a list of possible values.
+Usually the PDF colorspace would be derived from the color space of the input
+image. This option overwrites the automatically detected colorspace from the
+input image and thus forces a certain colorspace in the output PDF /ColorSpace
+property. This is useful for JPEG 2000 images with a different colorspace than
+RGB.""",
+ )
+
+ outargs.add_argument(
+ "-D",
+ "--nodate",
+ action="store_true",
+ help="Suppresses timestamps in the output and thus makes the output "
+ "deterministic between individual runs. You can also manually "
+ "set a date using the --moddate and --creationdate options.",
+ )
+
+ outargs.add_argument(
+ "--without-pdfrw",
+ action="store_true",
+ help="By default, img2pdf uses the pdfrw library to create the output "
+ "PDF if pdfrw is available. If you want to use the internal PDF "
+ "generator of img2pdf even if pdfrw is present, then pass this "
+ "option. This can be useful if you want to have unicode metadata "
+ "values which pdfrw does not yet support (See "
+ "https://github.com/pmaupin/pdfrw/issues/39) or if you want the "
+ "PDF code to be more human readable.",
+ )
+
+ outargs.add_argument(
+ "--first-frame-only",
+ action="store_true",
+ help="By default, img2pdf will convert multi-frame images like "
+ "multi-page TIFF or animated GIF images to one page per frame. "
+ "This option will only let the first frame of every multi-frame "
+ "input image be converted into a page in the resulting PDF.",
+ )
+
+ outargs.add_argument(
+ "--pillow-limit-break",
+ action="store_true",
+ help="img2pdf uses the Python Imaging Library Pillow to read input "
+ "images. Pillow limits the maximum input image size to %d pixels "
+ "to prevent decompression bomb denial of service attacks. If "
+ "your input image contains more pixels than that, use this "
+ "option to disable this safety measure during this run of img2pdf"
+ % Image.MAX_IMAGE_PIXELS,
+ )
+
+ sizeargs = parser.add_argument_group(
+ title="Image and page size and layout arguments",
+ description="""\
+Every input image will be placed on its own page. The image size is controlled
+by the dpi value of the input image or, if unset or missing, the default dpi of
+%.2f. By default, each page will have the same size as the image it shows.
+Thus, there will be no visible border between the image and the page border by
+default. If image size and page size are made different from each other by the
+options in this section, the image will always be centered in both dimensions.
+
+The image size and page size can be explicitly set using the --imgsize and
+--pagesize options, respectively. If either dimension of the image size is
+specified but the same dimension of the page size is not, then the latter will
+be derived from the former using an optional minimal distance between the image
+and the page border (given by the --border option) and/or a certain fitting
+strategy (given by the --fit option). The converse happens if a dimension of
+the page size is set but the same dimension of the image size is not.
+
+Any length value in below options is represented by the meta variable L which
+is a floating point value with an optional unit appended (without a space
+between them). The default unit is pt (1/72 inch, the PDF unit) and other
+allowed units are cm (centimeter), mm (millimeter), and in (inch).
+
+Any size argument of the format LxL in the options below specifies the width
+and height of a rectangle where the first L represents the width and the second
+L represents the height with an optional unit following each value as described
+above. Either width or height may be omitted. If the height is omitted, the
+separating x can be omitted as well. Omitting the width requires to prefix the
+height with the separating x. The missing dimension will be chosen so to not
+change the image aspect ratio. Instead of giving the width and height
+explicitly, you may also specify some (case-insensitive) common page sizes such
+as letter and A4. See the epilogue at the bottom for a complete list of the
+valid sizes.
+
+The --fit option scales to fit the image into a rectangle that is either
+derived from the --imgsize option or otherwise from the --pagesize option.
+If the --border option is given in addition to the --imgsize option while the
+--pagesize option is not given, then the page size will be calculated from the
+image size, respecting the border setting. If the --border option is given in
+addition to the --pagesize option while the --imgsize option is not given, then
+the image size will be calculated from the page size, respecting the border
+setting. If the --border option is given while both the --pagesize and
+--imgsize options are passed, then the --border option will be ignored.
+
+"""
+ % default_dpi,
+ )
+
+ sizeargs.add_argument(
+ "-S",
+ "--pagesize",
+ metavar="LxL",
+ type=parse_pagesize_rectarg,
+ help="""
+Sets the size of the PDF pages. The short-option is the upper case S because
+it is an mnemonic for being bigger than the image size.""",
+ )
+
+ sizeargs.add_argument(
+ "-s",
+ "--imgsize",
+ metavar="LxL",
+ type=parse_imgsize_rectarg,
+ help="""
+Sets the size of the images on the PDF pages. In addition, the unit dpi is
+allowed which will set the image size as a value of dots per inch. Instead of
+a unit, width and height values may also have a percentage sign appended,
+indicating a resize of the image by that percentage. The short-option is the
+lower case s because it is an mnemonic for being smaller than the page size.
+""",
+ )
+ sizeargs.add_argument(
+ "-b",
+ "--border",
+ metavar="L[:L]",
+ type=parse_borderarg,
+ help="""
+Specifies the minimal distance between the image border and the PDF page
+border. This value Is overwritten by explicit values set by --pagesize or
+--imgsize. The value will be used when calculating page dimensions from the
+image dimensions or the other way round. One, or two length values can be given
+as an argument, separated by a colon. One value specifies the minimal border on
+all four sides. Two values specify the minimal border on the top/bottom and
+left/right, respectively. It is not possible to specify asymmetric borders
+because images will always be centered on the page.
+""",
+ )
+ sizeargs.add_argument(
+ "-f",
+ "--fit",
+ metavar="FIT",
+ type=parse_fitarg,
+ default=FitMode.into,
+ help="""
+
+If --imgsize is given, fits the image using these dimensions. Otherwise, fit
+the image into the dimensions given by --pagesize. FIT is one of into, fill,
+exact, shrink and enlarge. The default value is "into". See the epilogue at the
+bottom for a description of the FIT options.
+
+""",
+ )
+ sizeargs.add_argument(
+ "-a",
+ "--auto-orient",
+ action="store_true",
+ help="""
+If both dimensions of the page are given via --pagesize, conditionally swaps
+these dimensions such that the page orientation is the same as the orientation
+of the input image. If the orientation of a page gets flipped, then so do the
+values set via the --border option.
+""",
+ )
+
+ metaargs = parser.add_argument_group(
+ title="Arguments setting metadata",
+ description="Options handling embedded timestamps, title and author "
+ "information.",
+ )
+ metaargs.add_argument(
+ "--title", metavar="title", type=str, help="Sets the title metadata value"
+ )
+ metaargs.add_argument(
+ "--author", metavar="author", type=str, help="Sets the author metadata value"
+ )
+ metaargs.add_argument(
+ "--creator", metavar="creator", type=str, help="Sets the creator metadata value"
+ )
+ metaargs.add_argument(
+ "--producer",
+ metavar="producer",
+ type=str,
+ default="img2pdf " + __version__,
+ help="Sets the producer metadata value "
+ "(default is: img2pdf " + __version__ + ")",
+ )
+ metaargs.add_argument(
+ "--creationdate",
+ metavar="creationdate",
+ type=valid_date,
+ help="Sets the UTC creation date metadata value in YYYY-MM-DD or "
+ "YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format "
+ "understood by python dateutil module or any format understood "
+ "by `date --date`",
+ )
+ metaargs.add_argument(
+ "--moddate",
+ metavar="moddate",
+ type=valid_date,
+ help="Sets the UTC modification date metadata value in YYYY-MM-DD "
+ "or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format "
+ "understood by python dateutil module or any format understood "
+ "by `date --date`",
+ )
+ metaargs.add_argument(
+ "--subject", metavar="subject", type=str, help="Sets the subject metadata value"
+ )
+ metaargs.add_argument(
+ "--keywords",
+ metavar="kw",
+ type=str,
+ nargs="+",
+ help="Sets the keywords metadata value (can be given multiple times)",
+ )
+
+ viewerargs = parser.add_argument_group(
+ title="PDF viewer arguments",
+ description="PDF files can specify how they are meant to be "
+ "presented to the user by a PDF viewer",
+ )
+
+ viewerargs.add_argument(
+ "--viewer-panes",
+ metavar="PANES",
+ type=parse_panes,
+ help="Instruct the PDF viewer which side panes to show. Valid values "
+ 'are "outlines" and "thumbs". It is not possible to specify both '
+ "at the same time.",
+ )
+ viewerargs.add_argument(
+ "--viewer-initial-page",
+ metavar="NUM",
+ type=int,
+ help="Instead of showing the first page, instruct the PDF viewer to "
+ "show the given page instead. Page numbers start with 1.",
+ )
+ viewerargs.add_argument(
+ "--viewer-magnification",
+ metavar="MAG",
+ type=parse_magnification,
+ help="Instruct the PDF viewer to open the PDF with a certain zoom "
+ "level. Valid values are either a floating point number giving "
+ 'the exact zoom level, "fit" (zoom to fit whole page), "fith" '
+ '(zoom to fit page width) and "fitbh" (zoom to fit visible page '
+ "width).",
+ )
+ viewerargs.add_argument(
+ "--viewer-page-layout",
+ metavar="LAYOUT",
+ type=parse_layout,
+ help="Instruct the PDF viewer how to arrange the pages on the screen. "
+ 'Valid values are "single" (display single pages), "onecolumn" '
+ '(one continuous column), "twocolumnright" (two continuous '
+ 'columns with odd number pages on the right) and "twocolumnleft" '
+ "(two continuous columns with odd numbered pages on the left)",
+ )
+ viewerargs.add_argument(
+ "--viewer-fit-window",
+ action="store_true",
+ help="Instruct the PDF viewer to resize the window to fit the page " "size",
+ )
+ viewerargs.add_argument(
+ "--viewer-center-window",
+ action="store_true",
+ help="Instruct the PDF viewer to center the PDF viewer window",
+ )
+ viewerargs.add_argument(
+ "--viewer-fullscreen",
+ action="store_true",
+ help="Instruct the PDF viewer to open the PDF in fullscreen mode",
+ )
+
+ args = parser.parse_args(argv[1:])
+
+ if args.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+
+ if args.pillow_limit_break:
+ Image.MAX_IMAGE_PIXELS = None
+
+ layout_fun = get_layout_fun(
+ args.pagesize, args.imgsize, args.border, args.fit, args.auto_orient
+ )
+
+ # if no positional arguments were supplied, read a single image from
+ # standard input
+ if len(args.images) == 0:
+ logging.info("reading image from standard input")
+ try:
+ if PY3:
+ args.images = [sys.stdin.buffer.read()]
+ else:
+ args.images = [sys.stdin.read()]
+ except KeyboardInterrupt:
+ exit(0)
+
+ # with the number of pages being equal to the number of images, the
+ # value passed to --viewer-initial-page must be between 1 and that number
+ if args.viewer_initial_page is not None:
+ if args.viewer_initial_page < 1:
+ parser.print_usage(file=sys.stderr)
+ logging.error(
+ "%s: error: argument --viewer-initial-page: must be "
+ "greater than zero" % parser.prog
+ )
+ exit(2)
+ if args.viewer_initial_page > len(args.images):
+ parser.print_usage(file=sys.stderr)
+ logging.error(
+ "%s: error: argument --viewer-initial-page: must be "
+ "less than or equal to the total number of pages" % parser.prog
+ )
+ exit(2)
+
+ try:
+ convert(
+ *args.images,
+ title=args.title,
+ author=args.author,
+ creator=args.creator,
+ producer=args.producer,
+ creationdate=args.creationdate,
+ moddate=args.moddate,
+ subject=args.subject,
+ keywords=args.keywords,
+ colorspace=args.colorspace,
+ nodate=args.nodate,
+ layout_fun=layout_fun,
+ viewer_panes=args.viewer_panes,
+ viewer_initial_page=args.viewer_initial_page,
+ viewer_magnification=args.viewer_magnification,
+ viewer_page_layout=args.viewer_page_layout,
+ viewer_fit_window=args.viewer_fit_window,
+ viewer_center_window=args.viewer_center_window,
+ viewer_fullscreen=args.viewer_fullscreen,
+ with_pdfrw=not args.without_pdfrw,
+ outputstream=args.output,
+ first_frame_only=args.first_frame_only
+ )
+ except Exception as e:
+ logging.error("error: " + str(e))
+ if logging.getLogger().isEnabledFor(logging.DEBUG):
+ import traceback
+
+ traceback.print_exc(file=sys.stderr)
+ exit(1)
+
+
+if __name__ == "__main__":
+ main()