summaryrefslogtreecommitdiff
path: root/src/img2pdf.py
diff options
context:
space:
mode:
authorJohannes Schauer Marin Rodrigues <josch@debian.org>2023-10-28 10:21:25 +0200
committerJohannes Schauer Marin Rodrigues <josch@debian.org>2023-10-28 10:21:25 +0200
commitd6d4451cf308865725d044a4331bda9ba7ec066d (patch)
tree378aa1e2bc45e361ca5aedff0a841bd41a4d5341 /src/img2pdf.py
parent7abe2f2f089f38a0ba403da8f1459f5c6bf2ffa6 (diff)
New upstream version 0.5.0
Diffstat (limited to 'src/img2pdf.py')
-rwxr-xr-xsrc/img2pdf.py506
1 files changed, 453 insertions, 53 deletions
diff --git a/src/img2pdf.py b/src/img2pdf.py
index 39a311b..036232b 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -22,7 +22,7 @@ import sys
import os
import zlib
import argparse
-from PIL import Image, TiffImagePlugin, GifImagePlugin
+from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms
if hasattr(GifImagePlugin, "LoadingStrategy"):
# Pillow 9.0.0 started emitting all frames but the first as RGB instead of
@@ -36,8 +36,8 @@ if hasattr(GifImagePlugin, "LoadingStrategy"):
# TiffImagePlugin.DEBUG = True
from PIL.ExifTags import TAGS
-from datetime import datetime
-from jp2 import parsejp2
+from datetime import datetime, timezone
+import jp2
from enum import Enum
from io import BytesIO
import logging
@@ -45,6 +45,8 @@ import struct
import platform
import hashlib
from itertools import chain
+import re
+import io
logger = logging.getLogger(__name__)
@@ -60,7 +62,7 @@ try:
except ImportError:
have_pikepdf = False
-__version__ = "0.4.4"
+__version__ = "0.5.0"
default_dpi = 96.0
papersizes = {
"letter": "8.5inx11in",
@@ -125,7 +127,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape")
Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other")
-ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other")
+ImageFormat = Enum(
+ "ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other"
+)
PageMode = Enum("PageMode", "none outlines thumbs")
@@ -442,7 +446,7 @@ class temp_attr:
if hasattr(self.obj, self.field):
self.exists = True
self.old_value = getattr(self.obj, self.field)
- print(f"setting {self.obj}.{self.field} = {self.value}")
+ logger.debug(f"setting {self.obj}.{self.field} = {self.value}")
setattr(self.obj, self.field, self.value)
def __exit__(self, exctype, excinst, exctb):
@@ -718,7 +722,7 @@ class pdfdoc(object):
self.writer.docinfo = PdfDict(indirect=True)
def datetime_to_pdfdate(dt):
- return dt.strftime("%Y%m%d%H%M%SZ")
+ return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ")
for k in ["Title", "Author", "Creator", "Producer", "Subject"]:
v = locals()[k.lower()]
@@ -728,7 +732,7 @@ class pdfdoc(object):
v = PdfString.encode(v)
self.writer.docinfo[getattr(PdfName, k)] = v
- now = datetime.now()
+ now = datetime.now().astimezone()
for k in ["CreationDate", "ModDate"]:
v = locals()[k.lower()]
if v is None and nodate:
@@ -748,7 +752,7 @@ class pdfdoc(object):
)
def datetime_to_xmpdate(dt):
- return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+ return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?>
<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'>
@@ -823,8 +827,10 @@ class pdfdoc(object):
artborder=None,
iccp=None,
):
- assert (color != Colorspace.RGBA and color != Colorspace.LA) or (
- imgformat == ImageFormat.PNG and smaskdata is not None
+ assert (
+ color not in [Colorspace.RGBA, Colorspace.LA]
+ or (imgformat == ImageFormat.PNG and smaskdata is not None)
+ or imgformat == ImageFormat.JPEG2000
)
if self.engine == Engine.pikepdf:
@@ -848,7 +854,13 @@ class pdfdoc(object):
if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA:
colorspace = PdfName.DeviceGray
elif color == Colorspace.RGB or color == Colorspace.RGBA:
- colorspace = PdfName.DeviceRGB
+ if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000:
+ # there is no DeviceRGBA and for JPXDecode it is okay to have
+ # no colorspace as the pdf reader is supposed to get this info
+ # from the jpeg2000 payload itself
+ colorspace = None
+ else:
+ colorspace = PdfName.DeviceRGB
elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]:
colorspace = PdfName.DeviceCMYK
elif color == Colorspace.P:
@@ -919,7 +931,8 @@ class pdfdoc(object):
image[PdfName.Filter] = ofilter
image[PdfName.Width] = imgwidthpx
image[PdfName.Height] = imgheightpx
- image[PdfName.ColorSpace] = colorspace
+ if colorspace is not None:
+ image[PdfName.ColorSpace] = colorspace
image[PdfName.BitsPerComponent] = depth
smask = None
@@ -1256,8 +1269,11 @@ class pdfdoc(object):
# now write out the PDF
if self.engine == Engine.pikepdf:
+ kwargs = {}
+ if pikepdf.__version__ >= "6.2.0":
+ kwargs["deterministic_id"] = True
self.writer.save(
- outputstream, min_version=self.output_version, linearize=True
+ outputstream, min_version=self.output_version, linearize=True, **kwargs
)
elif self.engine == Engine.pdfrw:
self.writer.trailer.Info = self.writer.docinfo
@@ -1285,7 +1301,7 @@ def get_imgmetadata(
if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None:
# this codepath gets called if the PIL installation is not able to
# handle JPEG2000 files
- imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata)
+ imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata)
if hdpi is None:
hdpi = default_dpi
@@ -1305,7 +1321,7 @@ def get_imgmetadata(
ics = imgdata.mode
# GIF and PNG files with transparency are supported
- if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and (
+ if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and (
ics in ["RGBA", "LA"] or "transparency" in imgdata.info
):
# Must check the IHDR chunk for the bit depth, because PIL would lossily
@@ -1315,6 +1331,10 @@ def get_imgmetadata(
if depth > 8:
logger.warning("Image with transparency and a bit depth of %d." % depth)
logger.warning("This is unsupported due to PIL limitations.")
+ logger.warning(
+ "If you accept a lossy conversion, you can manually convert "
+ "your images to 8 bit using `convert -depth 8` from imagemagick"
+ )
raise AlphaChannelError(
"Refusing to work with multiple >8bit channels."
)
@@ -1425,6 +1445,53 @@ def get_imgmetadata(
iccp = None
if "icc_profile" in imgdata.info:
iccp = imgdata.info.get("icc_profile")
+ # GIMP saves bilevel TIFF images and palette PNG images with only black and
+ # white in the palette with an RGB ICC profile which is useless
+ # https://gitlab.gnome.org/GNOME/gimp/-/issues/3438
+ # and produces an error in Adobe Acrobat, so we ignore it with a warning.
+ # imagemagick also used to (wrongly) include an RGB ICC profile for bilevel
+ # images: https://github.com/ImageMagick/ImageMagick/issues/2070
+ if iccp is not None and (
+ (color == Colorspace["1"] and imgformat == ImageFormat.TIFF)
+ or (
+ imgformat == ImageFormat.PNG
+ and color == Colorspace.P
+ and rawdata is not None
+ and parse_png(rawdata)[1]
+ in [b"\x00\x00\x00\xff\xff\xff", b"\xff\xff\xff\x00\x00\x00"]
+ )
+ ):
+ with io.BytesIO(iccp) as f:
+ prf = ImageCms.ImageCmsProfile(f)
+ if (
+ prf.profile.model == "sRGB"
+ and prf.profile.manufacturer == "GIMP"
+ and prf.profile.profile_description == "GIMP built-in sRGB"
+ ):
+ if imgformat == ImageFormat.TIFF:
+ logger.warning(
+ "Ignoring RGB ICC profile in bilevel TIFF produced by GIMP."
+ )
+ elif imgformat == ImageFormat.PNG:
+ logger.warning(
+ "Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP."
+ )
+ logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438")
+ iccp = None
+ # SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone
+ # with an RGB ICC profile which is useless.
+ # This produces an error in Adobe Acrobat, so we ignore it with a warning.
+ # Update: Found another case, the JPG is created by Adobe PhotoShop, so we
+ # don't check software anymore.
+ if iccp is not None and (
+ (color == Colorspace["L"] and imgformat == ImageFormat.JPEG)
+ ):
+ with io.BytesIO(iccp) as f:
+ prf = ImageCms.ImageCmsProfile(f)
+
+ if prf.profile.xcolor_space not in ("GRAY"):
+ logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG")
+ iccp = None
logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx)
@@ -1533,7 +1600,204 @@ def parse_png(rawdata):
return pngidat, palette
-def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
+miff_re = re.compile(
+ r"""
+ [^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space
+ [^=]+ # the field name can even contain spaces
+ = # field name and value are separated by an equal sign
+ (?:
+ [^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars
+ |{[^}]*} # or any kind of char surrounded by braces
+ )+""",
+ re.VERBOSE,
+)
+
+# https://imagemagick.org/script/miff.php
+# turn off black formatting until python 3.10 is available on more platforms
+# and we can use match/case
+# fmt: off
+def parse_miff(data):
+ results = []
+ header, rest = data.split(b":\x1a", 1)
+ header = header.decode("ISO-8859-1")
+ assert header.lower().startswith("id=imagemagick")
+ hdata = {}
+ for i, line in enumerate(re.findall(miff_re, header)):
+ if not line:
+ continue
+ k, v = line.split("=", 1)
+ if i == 0:
+ assert k.lower() == "id"
+ assert v.lower() == "imagemagick"
+ #match k.lower():
+ # case "class":
+ if k.lower() == "class":
+ #match v:
+ # case "DirectClass" | "PseudoClass":
+ if v in ["DirectClass", "PseudoClass"]:
+ hdata["class"] = v
+ # case _:
+ else:
+ print("cannot understand class", v)
+ # case "colorspace":
+ elif k.lower() == "colorspace":
+ # theoretically RGBA and CMYKA should be supported as well
+ # please teach me how to create such a MIFF file
+ #match v:
+ # case "sRGB" | "CMYK" | "Gray":
+ if v in ["sRGB", "CMYK", "Gray"]:
+ hdata["colorspace"] = v
+ # case _:
+ else:
+ print("cannot understand colorspace", v)
+ # case "depth":
+ elif k.lower() == "depth":
+ #match v:
+ # case "8" | "16" | "32":
+ if v in ["8", "16", "32"]:
+ hdata["depth"] = int(v)
+ # case _:
+ else:
+ print("cannot understand depth", v)
+ # case "colors":
+ elif k.lower() == "colors":
+ hdata["colors"] = int(v)
+ # case "matte":
+ elif k.lower() == "matte":
+ #match v:
+ # case "True":
+ if v == "True":
+ hdata["matte"] = True
+ # case "False":
+ elif v == "False":
+ hdata["matte"] = False
+ # case _:
+ else:
+ print("cannot understand matte", v)
+ # case "columns" | "rows":
+ elif k.lower() in ["columns", "rows"]:
+ hdata[k.lower()] = int(v)
+ # case "compression":
+ elif k.lower() == "compression":
+ print("compression not yet supported")
+ # case "profile":
+ elif k.lower() == "profile":
+ assert v in ["icc", "exif"]
+ hdata["profile"] = v
+ # case "resolution":
+ elif k.lower() == "resolution":
+ dpix, dpiy = v.split("x", 1)
+ hdata["resolution"] = (float(dpix), float(dpiy))
+
+ assert "depth" in hdata
+ assert "columns" in hdata
+ assert "rows" in hdata
+ #match hdata["class"]:
+ # case "DirectClass":
+ if hdata["class"] == "DirectClass":
+ if "colors" in hdata:
+ assert hdata["colors"] == 0
+ #match hdata["colorspace"]:
+ # case "sRGB":
+ if hdata["colorspace"] == "sRGB":
+ numchannels = 3
+ colorspace = Colorspace.RGB
+ # case "CMYK":
+ elif hdata["colorspace"] == "CMYK":
+ numchannels = 4
+ colorspace = Colorspace.CMYK
+ # case "Gray":
+ elif hdata["colorspace"] == "Gray":
+ numchannels = 1
+ colorspace = Colorspace.L
+ if hdata.get("matte"):
+ numchannels += 1
+ if hdata.get("profile"):
+ # there is no key encoding the length of icc or exif data
+ # according to the docs, the profile-icc key is supposed to do this
+ print("FAIL: exif")
+ else:
+ lenimgdata = (
+ hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"]
+ )
+ assert len(rest) >= lenimgdata, (
+ len(rest),
+ hdata["depth"],
+ numchannels,
+ hdata["columns"],
+ hdata["rows"],
+ lenimgdata,
+ )
+ if colorspace == Colorspace.RGB and hdata["depth"] == 8:
+ newimg = Image.frombytes("RGB", (hdata["columns"], hdata["rows"]), rest[:lenimgdata])
+ imgdata, palette, depth = to_png_data(newimg)
+ assert palette == b""
+ assert depth == hdata["depth"]
+ imgfmt = ImageFormat.PNG
+ else:
+ imgdata = zlib.compress(rest[:lenimgdata])
+ imgfmt = ImageFormat.MIFF
+ results.append(
+ (
+ colorspace,
+ hdata.get("resolution") or (default_dpi, default_dpi),
+ imgfmt,
+ imgdata,
+ None, # smask
+ hdata["columns"],
+ hdata["rows"],
+ [], # palette
+ False, # inverted
+ hdata["depth"],
+ 0, # rotation
+ None, # icc profile
+ )
+ )
+ if len(rest) > lenimgdata:
+ # another image is here
+ assert rest[lenimgdata:][:14].lower() == b"id=imagemagick"
+ results.extend(parse_miff(rest[lenimgdata:]))
+ # case "PseudoClass":
+ elif hdata["class"] == "PseudoClass":
+ assert "colors" in hdata
+ if hdata.get("matte"):
+ numchannels = 2
+ else:
+ numchannels = 1
+ lenpal = 3 * hdata["colors"] * hdata["depth"] // 8
+ lenimgdata = numchannels * hdata["rows"] * hdata["columns"]
+ assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata)
+ results.append(
+ (
+ Colorspace.RGB,
+ hdata.get("resolution") or (default_dpi, default_dpi),
+ ImageFormat.MIFF,
+ zlib.compress(rest[lenpal : lenpal + lenimgdata]),
+ None, # FIXME: allow alpha channel smask
+ hdata["columns"],
+ hdata["rows"],
+ rest[:lenpal], # palette
+ False, # inverted
+ hdata["depth"],
+ 0, # rotation
+ None, # icc profile
+ )
+ )
+ if len(rest) > lenpal + lenimgdata:
+ # another image is here
+ assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", (
+ len(rest),
+ lenpal,
+ lenimgdata,
+ )
+ results.extend(parse_miff(rest[lenpal + lenimgdata :]))
+ return results
+# fmt: on
+
+
+def read_images(
+ rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False
+):
im = BytesIO(rawdata)
im.seek(0)
imgdata = None
@@ -1541,13 +1805,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgdata = Image.open(im)
except IOError as e:
# test if it is a jpeg2000 image
- if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+ if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A":
+ # image is jpeg2000
+ imgformat = ImageFormat.JPEG2000
+ if rawdata[:14].lower() == b"id=imagemagick":
+ # image is in MIFF format
+ # this is useful for 16 bit CMYK because PNG cannot do CMYK and thus
+ # we need PIL but PIL cannot do 16 bit
+ imgformat = ImageFormat.MIFF
+ else:
raise ImageOpenError(
"cannot read input image (not jpeg2000). "
"PIL: error reading image: %s" % e
)
- # image is jpeg2000
- imgformat = ImageFormat.JPEG2000
else:
logger.debug("PIL format = %s", imgdata.format)
imgformat = None
@@ -1581,10 +1851,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
raise JpegColorspaceError("jpeg can't be monochrome")
if color == Colorspace["P"]:
raise JpegColorspaceError("jpeg can't have a color palette")
- if color == Colorspace["RGBA"]:
+ if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000:
raise JpegColorspaceError("jpeg can't have an alpha channel")
logger.debug("read_images() embeds a JPEG")
cleanup()
+ depth = 8
+ if imgformat == ImageFormat.JPEG2000:
+ *_, depth = jp2.parse(rawdata)
return [
(
color,
@@ -1596,7 +1869,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
imgheightpx,
[],
False,
- 8,
+ depth,
rotation,
iccp,
)
@@ -1613,6 +1886,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
if imgformat == ImageFormat.MPO:
result = []
img_page_count = 0
+ assert len(imgdata._MpoImageFile__mpoffsets) == len(imgdata.mpinfo[0xB002])
+ num_frames = len(imgdata.mpinfo[0xB002])
+ # An MPO file can be a main image together with one or more thumbnails
+ # if that is the case, then we only include all frames if the
+ # --include-thumbnails option is given. If it is not, such an MPO file
+ # will be embedded as is, so including its thumbnails but showing up
+ # as a single image page in the resulting PDF.
+ num_main_frames = 0
+ num_thumbnail_frames = 0
+ for i, mpent in enumerate(imgdata.mpinfo[0xB002]):
+ # check only the first frame for being the main image
+ if (
+ i == 0
+ and mpent["Attribute"]["DependentParentImageFlag"]
+ and not mpent["Attribute"]["DependentChildImageFlag"]
+ and mpent["Attribute"]["RepresentativeImageFlag"]
+ and mpent["Attribute"]["MPType"] == "Baseline MP Primary Image"
+ ):
+ num_main_frames += 1
+ elif (
+ not mpent["Attribute"]["DependentParentImageFlag"]
+ and mpent["Attribute"]["DependentChildImageFlag"]
+ and not mpent["Attribute"]["RepresentativeImageFlag"]
+ and mpent["Attribute"]["MPType"]
+ in [
+ "Large Thumbnail (VGA Equivalent)",
+ "Large Thumbnail (Full HD Equivalent)",
+ ]
+ ):
+ num_thumbnail_frames += 1
+ logger.debug(f"number of frames: {num_frames}")
+ logger.debug(f"number of main frames: {num_main_frames}")
+ logger.debug(f"number of thumbnail frames: {num_thumbnail_frames}")
+ # this MPO file is a main image plus zero or more thumbnails
+ # embed as-is unless the --include-thumbnails option was given
+ if num_frames == 1 or (
+ not include_thumbnails
+ and num_main_frames == 1
+ and num_thumbnail_frames + 1 == num_frames
+ ):
+ color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata(
+ imgdata, imgformat, default_dpi, colorspace, rawdata, rot
+ )
+ if color == Colorspace["1"]:
+ raise JpegColorspaceError("jpeg can't be monochrome")
+ if color == Colorspace["P"]:
+ raise JpegColorspaceError("jpeg can't have a color palette")
+ if color == Colorspace["RGBA"]:
+ raise JpegColorspaceError("jpeg can't have an alpha channel")
+ logger.debug("read_images() embeds an MPO verbatim")
+ cleanup()
+ return [
+ (
+ color,
+ ndpi,
+ ImageFormat.JPEG,
+ rawdata,
+ None,
+ imgwidthpx,
+ imgheightpx,
+ [],
+ False,
+ 8,
+ rotation,
+ iccp,
+ )
+ ]
+ # If the control flow reaches here, the MPO has more than a single
+ # frame but was not detected to be a main image followed by multiple
+ # thumbnails. We thus treat this MPO as we do other multi-frame images
+ # and include all its frames as individual pages.
for offset, mpent in zip(
imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002]
):
@@ -1710,6 +2054,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
]
+ if imgformat == ImageFormat.MIFF:
+ return parse_miff(rawdata)
+
# If our input is not JPEG or PNG, then we might have a format that
# supports multiple frames (like TIFF or GIF), so we need a loop to
# iterate through all frames of the image.
@@ -1875,7 +2222,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
)
)
else:
- if (
+ if color in [Colorspace.P, Colorspace.PA] and iccp is not None:
+ # PDF does not support palette images with icc profile
+ if color == Colorspace.P:
+ newcolor = Colorspace.RGB
+ newimg = newimg.convert(mode="RGB")
+ elif color == Colorspace.PA:
+ newcolor = Colorspace.RGBA
+ newimg = newimg.convert(mode="RGBA")
+ smaskidat = None
+ elif (
color == Colorspace.RGBA
or color == Colorspace.LA
or color == Colorspace.PA
@@ -1889,25 +2245,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None):
newcolor = color
l, a = newimg.split()
newimg = l
+ elif color == Colorspace.PA or (
+ color == Colorspace.P and "transparency" in newimg.info
+ ):
+ newcolor = color
+ a = newimg.convert(mode="RGBA").split()[-1]
else:
newcolor = Colorspace.RGBA
r, g, b, a = newimg.convert(mode="RGBA").split()
newimg = Image.merge("RGB", (r, g, b))
- smaskidat, _, _ = to_png_data(a)
+ smaskidat, *_ = to_png_data(a)
logger.warning(
"Image contains an alpha channel. Computing a separate "
"soft mask (/SMask) image to store transparency in PDF."
)
- elif color in [Colorspace.P, Colorspace.PA] and iccp is not None:
- # PDF does not support palette images with icc profile
- if color == Colorspace.P:
- newcolor = Colorspace.RGB
- newimg = newimg.convert(mode="RGB")
- elif color == Colorspace.PA:
- newcolor = Colorspace.RGBA
- newimg = newimg.convert(mode="RGBA")
- smaskidat = None
else:
newcolor = color
smaskidat = None
@@ -2249,7 +2601,6 @@ def find_scale(pagewidth, pageheight):
# as a binary string representing the image content or as filenames to the
# images.
def convert(*images, **kwargs):
-
_default_kwargs = dict(
engine=None,
title=None,
@@ -2279,6 +2630,7 @@ def convert(*images, **kwargs):
artborder=None,
pdfa=None,
rotation=None,
+ include_thumbnails=False,
)
for kwname, default in _default_kwargs.items():
if kwname not in kwargs:
@@ -2322,11 +2674,16 @@ def convert(*images, **kwargs):
for img in images:
# img is allowed to be a path, a binary string representing image data
# or a file-like object (really anything that implements read())
- try:
- rawdata = img.read()
- except AttributeError:
+ # or a pathlib.Path object (really anything that implements read_bytes())
+ rawdata = None
+ for fun in "read", "read_bytes":
+ try:
+ rawdata = getattr(img, fun)()
+ except AttributeError:
+ pass
+ if rawdata is None:
if not isinstance(img, (str, bytes)):
- raise TypeError("Neither implements read() nor is str or bytes")
+ raise TypeError("Neither read(), read_bytes() nor is str or bytes")
# the thing doesn't have a read() function, so try if we can treat
# it as a file name
try:
@@ -2344,6 +2701,10 @@ def convert(*images, **kwargs):
rawdata = f.read()
f.close()
+ # md5 = hashlib.md5(rawdata).hexdigest()
+ # with open("./testdata/" + md5, "wb") as f:
+ # f.write(rawdata)
+
for (
color,
ndpi,
@@ -2362,6 +2723,7 @@ def convert(*images, **kwargs):
kwargs["colorspace"],
kwargs["first_frame_only"],
kwargs["rotation"],
+ kwargs["include_thumbnails"],
):
pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"](
imgwidthpx, imgheightpx, ndpi
@@ -2737,7 +3099,7 @@ def valid_date(string):
else:
try:
return parser.parse(string)
- except TypeError:
+ except:
pass
# as a last resort, try the local date utility
try:
@@ -2750,7 +3112,7 @@ def valid_date(string):
except subprocess.CalledProcessError:
pass
else:
- return datetime.utcfromtimestamp(int(utime))
+ return datetime.fromtimestamp(int(utime))
raise argparse.ArgumentTypeError("cannot parse date: %s" % string)
@@ -3452,7 +3814,18 @@ def gui():
app.mainloop()
-def main(argv=sys.argv):
+def get_default_icc_profile():
+ for profile in [
+ "/usr/share/color/icc/sRGB.icc",
+ "/usr/share/color/icc/OpenICC/sRGB.icc",
+ "/usr/share/color/icc/colord/sRGB.icc",
+ ]:
+ if os.path.exists(profile):
+ return profile
+ return "/usr/share/color/icc/sRGB.icc"
+
+
+def get_main_parser():
rendered_papersizes = ""
for k, v in sorted(papersizes.items()):
rendered_papersizes += " %-8s %s\n" % (papernames[k], v)
@@ -3493,7 +3866,9 @@ Paper sizes:
the value in the second column has the same effect as giving the short hand
in the first column. Appending ^T (a caret/circumflex followed by the letter
T) turns the paper size from portrait into landscape. The postfix thus
- symbolizes the transpose. The values are case insensitive.
+ symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is
+ the escape character, so you need to put quotes around the option value.
+ The values are case insensitive.
%s
@@ -3560,7 +3935,7 @@ Examples:
while preserving its aspect ratio and a print border of 2 cm on the top and
bottom and 2.5 cm on the left and right hand side.
- $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg
+ $ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg
On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the
original image size if the image is smaller than that.
@@ -3696,6 +4071,17 @@ RGB.""",
)
outargs.add_argument(
+ "--include-thumbnails",
+ action="store_true",
+ help="Some multi-frame formats like MPO carry a main image and "
+ "one or more scaled-down copies of the main image (thumbnails). "
+ "In such a case, img2pdf will only include the main image and "
+ "not create additional pages for each of the thumbnails. If this "
+ "option is set, img2pdf will instead create one page per frame and "
+ "thus store each thumbnail on its own page.",
+ )
+
+ outargs.add_argument(
"--pillow-limit-break",
action="store_true",
help="img2pdf uses the Python Imaging Library Pillow to read input "
@@ -3706,14 +4092,20 @@ RGB.""",
% Image.MAX_IMAGE_PIXELS,
)
- outargs.add_argument(
- "--pdfa",
- nargs="?",
- const="/usr/share/color/icc/sRGB.icc",
- default=None,
- help="Output a PDF/A-1b compliant document. By default, this will "
- "embed /usr/share/color/icc/sRGB.icc as the color profile.",
- )
+ if sys.platform == "win32":
+ pass
+ else:
+ outargs.add_argument(
+ "--pdfa",
+ nargs="?",
+ const=get_default_icc_profile(),
+ default=None,
+ help="Output a PDF/A-1b compliant document. By default, this will "
+ "embed either /usr/share/color/icc/sRGB.icc, "
+ "/usr/share/color/icc/OpenICC/sRGB.icc or "
+ "/usr/share/color/icc/colord/sRGB.icc as the color profile, whichever "
+ "is found to exist first.",
+ )
sizeargs = parser.add_argument_group(
title="Image and page size and layout arguments",
@@ -4002,8 +4394,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
action="store_true",
help="Instruct the PDF viewer to open the PDF in fullscreen mode",
)
+ return parser
- args = parser.parse_args(argv[1:])
+
+def main(argv=sys.argv):
+ args = get_main_parser().parse_args(argv[1:])
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
@@ -4027,7 +4422,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
elif len(args.images) == 0 and len(args.from_file) == 0:
# if no positional arguments were supplied, read a single image from
# standard input
- logger.info("reading image from standard input")
+ print(
+ "Reading image from standard input...\n"
+ "Re-run with -h or --help for usage information.",
+ file=sys.stderr,
+ )
try:
images = [sys.stdin.buffer.read()]
except KeyboardInterrupt:
@@ -4088,6 +4487,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders.
artborder=args.art_border,
pdfa=args.pdfa,
rotation=args.rotation,
+ include_thumbnails=args.include_thumbnails,
)
except Exception as e:
logger.error("error: " + str(e))