diff options
author | Johannes Schauer Marin Rodrigues <josch@debian.org> | 2023-10-28 10:21:25 +0200 |
---|---|---|
committer | Johannes Schauer Marin Rodrigues <josch@debian.org> | 2023-10-28 10:21:25 +0200 |
commit | d6d4451cf308865725d044a4331bda9ba7ec066d (patch) | |
tree | 378aa1e2bc45e361ca5aedff0a841bd41a4d5341 /src/img2pdf.py | |
parent | 7abe2f2f089f38a0ba403da8f1459f5c6bf2ffa6 (diff) |
New upstream version 0.5.0
Diffstat (limited to 'src/img2pdf.py')
-rwxr-xr-x | src/img2pdf.py | 506 |
1 files changed, 453 insertions, 53 deletions
diff --git a/src/img2pdf.py b/src/img2pdf.py index 39a311b..036232b 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -22,7 +22,7 @@ import sys import os import zlib import argparse -from PIL import Image, TiffImagePlugin, GifImagePlugin +from PIL import Image, TiffImagePlugin, GifImagePlugin, ImageCms if hasattr(GifImagePlugin, "LoadingStrategy"): # Pillow 9.0.0 started emitting all frames but the first as RGB instead of @@ -36,8 +36,8 @@ if hasattr(GifImagePlugin, "LoadingStrategy"): # TiffImagePlugin.DEBUG = True from PIL.ExifTags import TAGS -from datetime import datetime -from jp2 import parsejp2 +from datetime import datetime, timezone +import jp2 from enum import Enum from io import BytesIO import logging @@ -45,6 +45,8 @@ import struct import platform import hashlib from itertools import chain +import re +import io logger = logging.getLogger(__name__) @@ -60,7 +62,7 @@ try: except ImportError: have_pikepdf = False -__version__ = "0.4.4" +__version__ = "0.5.0" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", @@ -125,7 +127,9 @@ PageOrientation = Enum("PageOrientation", "portrait landscape") Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other") -ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other") +ImageFormat = Enum( + "ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO MIFF other" +) PageMode = Enum("PageMode", "none outlines thumbs") @@ -442,7 +446,7 @@ class temp_attr: if hasattr(self.obj, self.field): self.exists = True self.old_value = getattr(self.obj, self.field) - print(f"setting {self.obj}.{self.field} = {self.value}") + logger.debug(f"setting {self.obj}.{self.field} = {self.value}") setattr(self.obj, self.field, self.value) def __exit__(self, exctype, excinst, exctb): @@ -718,7 +722,7 @@ class pdfdoc(object): self.writer.docinfo = PdfDict(indirect=True) def datetime_to_pdfdate(dt): - return dt.strftime("%Y%m%d%H%M%SZ") + return dt.astimezone(tz=timezone.utc).strftime("%Y%m%d%H%M%SZ") for k in ["Title", "Author", "Creator", "Producer", "Subject"]: v = locals()[k.lower()] @@ -728,7 +732,7 @@ class pdfdoc(object): v = PdfString.encode(v) self.writer.docinfo[getattr(PdfName, k)] = v - now = datetime.now() + now = datetime.now().astimezone() for k in ["CreationDate", "ModDate"]: v = locals()[k.lower()] if v is None and nodate: @@ -748,7 +752,7 @@ class pdfdoc(object): ) def datetime_to_xmpdate(dt): - return dt.strftime("%Y-%m-%dT%H:%M:%SZ") + return dt.astimezone(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?> <x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'> @@ -823,8 +827,10 @@ class pdfdoc(object): artborder=None, iccp=None, ): - assert (color != Colorspace.RGBA and color != Colorspace.LA) or ( - imgformat == ImageFormat.PNG and smaskdata is not None + assert ( + color not in [Colorspace.RGBA, Colorspace.LA] + or (imgformat == ImageFormat.PNG and smaskdata is not None) + or imgformat == ImageFormat.JPEG2000 ) if self.engine == Engine.pikepdf: @@ -848,7 +854,13 @@ class pdfdoc(object): if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA: colorspace = PdfName.DeviceGray elif color == Colorspace.RGB or color == Colorspace.RGBA: - colorspace = PdfName.DeviceRGB + if color == Colorspace.RGBA and imgformat == ImageFormat.JPEG2000: + # there is no DeviceRGBA and for JPXDecode it is okay to have + # no colorspace as the pdf reader is supposed to get this info + # from the jpeg2000 payload itself + colorspace = None + else: + colorspace = PdfName.DeviceRGB elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: colorspace = PdfName.DeviceCMYK elif color == Colorspace.P: @@ -919,7 +931,8 @@ class pdfdoc(object): image[PdfName.Filter] = ofilter image[PdfName.Width] = imgwidthpx image[PdfName.Height] = imgheightpx - image[PdfName.ColorSpace] = colorspace + if colorspace is not None: + image[PdfName.ColorSpace] = colorspace image[PdfName.BitsPerComponent] = depth smask = None @@ -1256,8 +1269,11 @@ class pdfdoc(object): # now write out the PDF if self.engine == Engine.pikepdf: + kwargs = {} + if pikepdf.__version__ >= "6.2.0": + kwargs["deterministic_id"] = True self.writer.save( - outputstream, min_version=self.output_version, linearize=True + outputstream, min_version=self.output_version, linearize=True, **kwargs ) elif self.engine == Engine.pdfrw: self.writer.trailer.Info = self.writer.docinfo @@ -1285,7 +1301,7 @@ def get_imgmetadata( if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: # this codepath gets called if the PIL installation is not able to # handle JPEG2000 files - imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata) + imgwidthpx, imgheightpx, ics, hdpi, vdpi, channels, bpp = jp2.parse(rawdata) if hdpi is None: hdpi = default_dpi @@ -1305,7 +1321,7 @@ def get_imgmetadata( ics = imgdata.mode # GIF and PNG files with transparency are supported - if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and ( + if imgformat in [ImageFormat.PNG, ImageFormat.GIF, ImageFormat.JPEG2000] and ( ics in ["RGBA", "LA"] or "transparency" in imgdata.info ): # Must check the IHDR chunk for the bit depth, because PIL would lossily @@ -1315,6 +1331,10 @@ def get_imgmetadata( if depth > 8: logger.warning("Image with transparency and a bit depth of %d." % depth) logger.warning("This is unsupported due to PIL limitations.") + logger.warning( + "If you accept a lossy conversion, you can manually convert " + "your images to 8 bit using `convert -depth 8` from imagemagick" + ) raise AlphaChannelError( "Refusing to work with multiple >8bit channels." ) @@ -1425,6 +1445,53 @@ def get_imgmetadata( iccp = None if "icc_profile" in imgdata.info: iccp = imgdata.info.get("icc_profile") + # GIMP saves bilevel TIFF images and palette PNG images with only black and + # white in the palette with an RGB ICC profile which is useless + # https://gitlab.gnome.org/GNOME/gimp/-/issues/3438 + # and produces an error in Adobe Acrobat, so we ignore it with a warning. + # imagemagick also used to (wrongly) include an RGB ICC profile for bilevel + # images: https://github.com/ImageMagick/ImageMagick/issues/2070 + if iccp is not None and ( + (color == Colorspace["1"] and imgformat == ImageFormat.TIFF) + or ( + imgformat == ImageFormat.PNG + and color == Colorspace.P + and rawdata is not None + and parse_png(rawdata)[1] + in [b"\x00\x00\x00\xff\xff\xff", b"\xff\xff\xff\x00\x00\x00"] + ) + ): + with io.BytesIO(iccp) as f: + prf = ImageCms.ImageCmsProfile(f) + if ( + prf.profile.model == "sRGB" + and prf.profile.manufacturer == "GIMP" + and prf.profile.profile_description == "GIMP built-in sRGB" + ): + if imgformat == ImageFormat.TIFF: + logger.warning( + "Ignoring RGB ICC profile in bilevel TIFF produced by GIMP." + ) + elif imgformat == ImageFormat.PNG: + logger.warning( + "Ignoring RGB ICC profile in 2-color palette PNG produced by GIMP." + ) + logger.warning("https://gitlab.gnome.org/GNOME/gimp/-/issues/3438") + iccp = None + # SmartAlbums old version (found 2.2.6) exports JPG with only 1 compone + # with an RGB ICC profile which is useless. + # This produces an error in Adobe Acrobat, so we ignore it with a warning. + # Update: Found another case, the JPG is created by Adobe PhotoShop, so we + # don't check software anymore. + if iccp is not None and ( + (color == Colorspace["L"] and imgformat == ImageFormat.JPEG) + ): + with io.BytesIO(iccp) as f: + prf = ImageCms.ImageCmsProfile(f) + + if prf.profile.xcolor_space not in ("GRAY"): + logger.warning("Ignoring non-GRAY ICC profile in Grayscale JPG") + iccp = None logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) @@ -1533,7 +1600,204 @@ def parse_png(rawdata): return pngidat, palette -def read_images(rawdata, colorspace, first_frame_only=False, rot=None): +miff_re = re.compile( + r""" + [^\x00-\x20\x7f-\x9f] # the field name must not start with a control char or space + [^=]+ # the field name can even contain spaces + = # field name and value are separated by an equal sign + (?: + [^\x00-\x20\x7f-\x9f{}] # either chars that are not braces and not control chars + |{[^}]*} # or any kind of char surrounded by braces + )+""", + re.VERBOSE, +) + +# https://imagemagick.org/script/miff.php +# turn off black formatting until python 3.10 is available on more platforms +# and we can use match/case +# fmt: off +def parse_miff(data): + results = [] + header, rest = data.split(b":\x1a", 1) + header = header.decode("ISO-8859-1") + assert header.lower().startswith("id=imagemagick") + hdata = {} + for i, line in enumerate(re.findall(miff_re, header)): + if not line: + continue + k, v = line.split("=", 1) + if i == 0: + assert k.lower() == "id" + assert v.lower() == "imagemagick" + #match k.lower(): + # case "class": + if k.lower() == "class": + #match v: + # case "DirectClass" | "PseudoClass": + if v in ["DirectClass", "PseudoClass"]: + hdata["class"] = v + # case _: + else: + print("cannot understand class", v) + # case "colorspace": + elif k.lower() == "colorspace": + # theoretically RGBA and CMYKA should be supported as well + # please teach me how to create such a MIFF file + #match v: + # case "sRGB" | "CMYK" | "Gray": + if v in ["sRGB", "CMYK", "Gray"]: + hdata["colorspace"] = v + # case _: + else: + print("cannot understand colorspace", v) + # case "depth": + elif k.lower() == "depth": + #match v: + # case "8" | "16" | "32": + if v in ["8", "16", "32"]: + hdata["depth"] = int(v) + # case _: + else: + print("cannot understand depth", v) + # case "colors": + elif k.lower() == "colors": + hdata["colors"] = int(v) + # case "matte": + elif k.lower() == "matte": + #match v: + # case "True": + if v == "True": + hdata["matte"] = True + # case "False": + elif v == "False": + hdata["matte"] = False + # case _: + else: + print("cannot understand matte", v) + # case "columns" | "rows": + elif k.lower() in ["columns", "rows"]: + hdata[k.lower()] = int(v) + # case "compression": + elif k.lower() == "compression": + print("compression not yet supported") + # case "profile": + elif k.lower() == "profile": + assert v in ["icc", "exif"] + hdata["profile"] = v + # case "resolution": + elif k.lower() == "resolution": + dpix, dpiy = v.split("x", 1) + hdata["resolution"] = (float(dpix), float(dpiy)) + + assert "depth" in hdata + assert "columns" in hdata + assert "rows" in hdata + #match hdata["class"]: + # case "DirectClass": + if hdata["class"] == "DirectClass": + if "colors" in hdata: + assert hdata["colors"] == 0 + #match hdata["colorspace"]: + # case "sRGB": + if hdata["colorspace"] == "sRGB": + numchannels = 3 + colorspace = Colorspace.RGB + # case "CMYK": + elif hdata["colorspace"] == "CMYK": + numchannels = 4 + colorspace = Colorspace.CMYK + # case "Gray": + elif hdata["colorspace"] == "Gray": + numchannels = 1 + colorspace = Colorspace.L + if hdata.get("matte"): + numchannels += 1 + if hdata.get("profile"): + # there is no key encoding the length of icc or exif data + # according to the docs, the profile-icc key is supposed to do this + print("FAIL: exif") + else: + lenimgdata = ( + hdata["depth"] // 8 * numchannels * hdata["columns"] * hdata["rows"] + ) + assert len(rest) >= lenimgdata, ( + len(rest), + hdata["depth"], + numchannels, + hdata["columns"], + hdata["rows"], + lenimgdata, + ) + if colorspace == Colorspace.RGB and hdata["depth"] == 8: + newimg = Image.frombytes("RGB", (hdata["columns"], hdata["rows"]), rest[:lenimgdata]) + imgdata, palette, depth = to_png_data(newimg) + assert palette == b"" + assert depth == hdata["depth"] + imgfmt = ImageFormat.PNG + else: + imgdata = zlib.compress(rest[:lenimgdata]) + imgfmt = ImageFormat.MIFF + results.append( + ( + colorspace, + hdata.get("resolution") or (default_dpi, default_dpi), + imgfmt, + imgdata, + None, # smask + hdata["columns"], + hdata["rows"], + [], # palette + False, # inverted + hdata["depth"], + 0, # rotation + None, # icc profile + ) + ) + if len(rest) > lenimgdata: + # another image is here + assert rest[lenimgdata:][:14].lower() == b"id=imagemagick" + results.extend(parse_miff(rest[lenimgdata:])) + # case "PseudoClass": + elif hdata["class"] == "PseudoClass": + assert "colors" in hdata + if hdata.get("matte"): + numchannels = 2 + else: + numchannels = 1 + lenpal = 3 * hdata["colors"] * hdata["depth"] // 8 + lenimgdata = numchannels * hdata["rows"] * hdata["columns"] + assert len(rest) >= lenpal + lenimgdata, (len(rest), lenpal, lenimgdata) + results.append( + ( + Colorspace.RGB, + hdata.get("resolution") or (default_dpi, default_dpi), + ImageFormat.MIFF, + zlib.compress(rest[lenpal : lenpal + lenimgdata]), + None, # FIXME: allow alpha channel smask + hdata["columns"], + hdata["rows"], + rest[:lenpal], # palette + False, # inverted + hdata["depth"], + 0, # rotation + None, # icc profile + ) + ) + if len(rest) > lenpal + lenimgdata: + # another image is here + assert rest[lenpal + lenimgdata :][:14].lower() == b"id=imagemagick", ( + len(rest), + lenpal, + lenimgdata, + ) + results.extend(parse_miff(rest[lenpal + lenimgdata :])) + return results +# fmt: on + + +def read_images( + rawdata, colorspace, first_frame_only=False, rot=None, include_thumbnails=False +): im = BytesIO(rawdata) im.seek(0) imgdata = None @@ -1541,13 +1805,19 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): imgdata = Image.open(im) except IOError as e: # test if it is a jpeg2000 image - if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + if rawdata[:12] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": + # image is jpeg2000 + imgformat = ImageFormat.JPEG2000 + if rawdata[:14].lower() == b"id=imagemagick": + # image is in MIFF format + # this is useful for 16 bit CMYK because PNG cannot do CMYK and thus + # we need PIL but PIL cannot do 16 bit + imgformat = ImageFormat.MIFF + else: raise ImageOpenError( "cannot read input image (not jpeg2000). " "PIL: error reading image: %s" % e ) - # image is jpeg2000 - imgformat = ImageFormat.JPEG2000 else: logger.debug("PIL format = %s", imgdata.format) imgformat = None @@ -1581,10 +1851,13 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): raise JpegColorspaceError("jpeg can't be monochrome") if color == Colorspace["P"]: raise JpegColorspaceError("jpeg can't have a color palette") - if color == Colorspace["RGBA"]: + if color == Colorspace["RGBA"] and imgformat != ImageFormat.JPEG2000: raise JpegColorspaceError("jpeg can't have an alpha channel") logger.debug("read_images() embeds a JPEG") cleanup() + depth = 8 + if imgformat == ImageFormat.JPEG2000: + *_, depth = jp2.parse(rawdata) return [ ( color, @@ -1596,7 +1869,7 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): imgheightpx, [], False, - 8, + depth, rotation, iccp, ) @@ -1613,6 +1886,77 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): if imgformat == ImageFormat.MPO: result = [] img_page_count = 0 + assert len(imgdata._MpoImageFile__mpoffsets) == len(imgdata.mpinfo[0xB002]) + num_frames = len(imgdata.mpinfo[0xB002]) + # An MPO file can be a main image together with one or more thumbnails + # if that is the case, then we only include all frames if the + # --include-thumbnails option is given. If it is not, such an MPO file + # will be embedded as is, so including its thumbnails but showing up + # as a single image page in the resulting PDF. + num_main_frames = 0 + num_thumbnail_frames = 0 + for i, mpent in enumerate(imgdata.mpinfo[0xB002]): + # check only the first frame for being the main image + if ( + i == 0 + and mpent["Attribute"]["DependentParentImageFlag"] + and not mpent["Attribute"]["DependentChildImageFlag"] + and mpent["Attribute"]["RepresentativeImageFlag"] + and mpent["Attribute"]["MPType"] == "Baseline MP Primary Image" + ): + num_main_frames += 1 + elif ( + not mpent["Attribute"]["DependentParentImageFlag"] + and mpent["Attribute"]["DependentChildImageFlag"] + and not mpent["Attribute"]["RepresentativeImageFlag"] + and mpent["Attribute"]["MPType"] + in [ + "Large Thumbnail (VGA Equivalent)", + "Large Thumbnail (Full HD Equivalent)", + ] + ): + num_thumbnail_frames += 1 + logger.debug(f"number of frames: {num_frames}") + logger.debug(f"number of main frames: {num_main_frames}") + logger.debug(f"number of thumbnail frames: {num_thumbnail_frames}") + # this MPO file is a main image plus zero or more thumbnails + # embed as-is unless the --include-thumbnails option was given + if num_frames == 1 or ( + not include_thumbnails + and num_main_frames == 1 + and num_thumbnail_frames + 1 == num_frames + ): + color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( + imgdata, imgformat, default_dpi, colorspace, rawdata, rot + ) + if color == Colorspace["1"]: + raise JpegColorspaceError("jpeg can't be monochrome") + if color == Colorspace["P"]: + raise JpegColorspaceError("jpeg can't have a color palette") + if color == Colorspace["RGBA"]: + raise JpegColorspaceError("jpeg can't have an alpha channel") + logger.debug("read_images() embeds an MPO verbatim") + cleanup() + return [ + ( + color, + ndpi, + ImageFormat.JPEG, + rawdata, + None, + imgwidthpx, + imgheightpx, + [], + False, + 8, + rotation, + iccp, + ) + ] + # If the control flow reaches here, the MPO has more than a single + # frame but was not detected to be a main image followed by multiple + # thumbnails. We thus treat this MPO as we do other multi-frame images + # and include all its frames as individual pages. for offset, mpent in zip( imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002] ): @@ -1710,6 +2054,9 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): ) ] + if imgformat == ImageFormat.MIFF: + return parse_miff(rawdata) + # If our input is not JPEG or PNG, then we might have a format that # supports multiple frames (like TIFF or GIF), so we need a loop to # iterate through all frames of the image. @@ -1875,7 +2222,16 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): ) ) else: - if ( + if color in [Colorspace.P, Colorspace.PA] and iccp is not None: + # PDF does not support palette images with icc profile + if color == Colorspace.P: + newcolor = Colorspace.RGB + newimg = newimg.convert(mode="RGB") + elif color == Colorspace.PA: + newcolor = Colorspace.RGBA + newimg = newimg.convert(mode="RGBA") + smaskidat = None + elif ( color == Colorspace.RGBA or color == Colorspace.LA or color == Colorspace.PA @@ -1889,25 +2245,21 @@ def read_images(rawdata, colorspace, first_frame_only=False, rot=None): newcolor = color l, a = newimg.split() newimg = l + elif color == Colorspace.PA or ( + color == Colorspace.P and "transparency" in newimg.info + ): + newcolor = color + a = newimg.convert(mode="RGBA").split()[-1] else: newcolor = Colorspace.RGBA r, g, b, a = newimg.convert(mode="RGBA").split() newimg = Image.merge("RGB", (r, g, b)) - smaskidat, _, _ = to_png_data(a) + smaskidat, *_ = to_png_data(a) logger.warning( "Image contains an alpha channel. Computing a separate " "soft mask (/SMask) image to store transparency in PDF." ) - elif color in [Colorspace.P, Colorspace.PA] and iccp is not None: - # PDF does not support palette images with icc profile - if color == Colorspace.P: - newcolor = Colorspace.RGB - newimg = newimg.convert(mode="RGB") - elif color == Colorspace.PA: - newcolor = Colorspace.RGBA - newimg = newimg.convert(mode="RGBA") - smaskidat = None else: newcolor = color smaskidat = None @@ -2249,7 +2601,6 @@ def find_scale(pagewidth, pageheight): # as a binary string representing the image content or as filenames to the # images. def convert(*images, **kwargs): - _default_kwargs = dict( engine=None, title=None, @@ -2279,6 +2630,7 @@ def convert(*images, **kwargs): artborder=None, pdfa=None, rotation=None, + include_thumbnails=False, ) for kwname, default in _default_kwargs.items(): if kwname not in kwargs: @@ -2322,11 +2674,16 @@ def convert(*images, **kwargs): for img in images: # img is allowed to be a path, a binary string representing image data # or a file-like object (really anything that implements read()) - try: - rawdata = img.read() - except AttributeError: + # or a pathlib.Path object (really anything that implements read_bytes()) + rawdata = None + for fun in "read", "read_bytes": + try: + rawdata = getattr(img, fun)() + except AttributeError: + pass + if rawdata is None: if not isinstance(img, (str, bytes)): - raise TypeError("Neither implements read() nor is str or bytes") + raise TypeError("Neither read(), read_bytes() nor is str or bytes") # the thing doesn't have a read() function, so try if we can treat # it as a file name try: @@ -2344,6 +2701,10 @@ def convert(*images, **kwargs): rawdata = f.read() f.close() + # md5 = hashlib.md5(rawdata).hexdigest() + # with open("./testdata/" + md5, "wb") as f: + # f.write(rawdata) + for ( color, ndpi, @@ -2362,6 +2723,7 @@ def convert(*images, **kwargs): kwargs["colorspace"], kwargs["first_frame_only"], kwargs["rotation"], + kwargs["include_thumbnails"], ): pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"]( imgwidthpx, imgheightpx, ndpi @@ -2737,7 +3099,7 @@ def valid_date(string): else: try: return parser.parse(string) - except TypeError: + except: pass # as a last resort, try the local date utility try: @@ -2750,7 +3112,7 @@ def valid_date(string): except subprocess.CalledProcessError: pass else: - return datetime.utcfromtimestamp(int(utime)) + return datetime.fromtimestamp(int(utime)) raise argparse.ArgumentTypeError("cannot parse date: %s" % string) @@ -3452,7 +3814,18 @@ def gui(): app.mainloop() -def main(argv=sys.argv): +def get_default_icc_profile(): + for profile in [ + "/usr/share/color/icc/sRGB.icc", + "/usr/share/color/icc/OpenICC/sRGB.icc", + "/usr/share/color/icc/colord/sRGB.icc", + ]: + if os.path.exists(profile): + return profile + return "/usr/share/color/icc/sRGB.icc" + + +def get_main_parser(): rendered_papersizes = "" for k, v in sorted(papersizes.items()): rendered_papersizes += " %-8s %s\n" % (papernames[k], v) @@ -3493,7 +3866,9 @@ Paper sizes: the value in the second column has the same effect as giving the short hand in the first column. Appending ^T (a caret/circumflex followed by the letter T) turns the paper size from portrait into landscape. The postfix thus - symbolizes the transpose. The values are case insensitive. + symbolizes the transpose. Note that on Windows cmd.exe the caret symbol is + the escape character, so you need to put quotes around the option value. + The values are case insensitive. %s @@ -3560,7 +3935,7 @@ Examples: while preserving its aspect ratio and a print border of 2 cm on the top and bottom and 2.5 cm on the left and right hand side. - $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg + $ img2pdf --output out.pdf --pagesize "A4^T" --border 2cm:2.5cm *.jpg On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the original image size if the image is smaller than that. @@ -3696,6 +4071,17 @@ RGB.""", ) outargs.add_argument( + "--include-thumbnails", + action="store_true", + help="Some multi-frame formats like MPO carry a main image and " + "one or more scaled-down copies of the main image (thumbnails). " + "In such a case, img2pdf will only include the main image and " + "not create additional pages for each of the thumbnails. If this " + "option is set, img2pdf will instead create one page per frame and " + "thus store each thumbnail on its own page.", + ) + + outargs.add_argument( "--pillow-limit-break", action="store_true", help="img2pdf uses the Python Imaging Library Pillow to read input " @@ -3706,14 +4092,20 @@ RGB.""", % Image.MAX_IMAGE_PIXELS, ) - outargs.add_argument( - "--pdfa", - nargs="?", - const="/usr/share/color/icc/sRGB.icc", - default=None, - help="Output a PDF/A-1b compliant document. By default, this will " - "embed /usr/share/color/icc/sRGB.icc as the color profile.", - ) + if sys.platform == "win32": + pass + else: + outargs.add_argument( + "--pdfa", + nargs="?", + const=get_default_icc_profile(), + default=None, + help="Output a PDF/A-1b compliant document. By default, this will " + "embed either /usr/share/color/icc/sRGB.icc, " + "/usr/share/color/icc/OpenICC/sRGB.icc or " + "/usr/share/color/icc/colord/sRGB.icc as the color profile, whichever " + "is found to exist first.", + ) sizeargs = parser.add_argument_group( title="Image and page size and layout arguments", @@ -4002,8 +4394,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders. action="store_true", help="Instruct the PDF viewer to open the PDF in fullscreen mode", ) + return parser - args = parser.parse_args(argv[1:]) + +def main(argv=sys.argv): + args = get_main_parser().parse_args(argv[1:]) if args.verbose: logging.basicConfig(level=logging.DEBUG) @@ -4027,7 +4422,11 @@ and left/right, respectively. It is not possible to specify asymmetric borders. elif len(args.images) == 0 and len(args.from_file) == 0: # if no positional arguments were supplied, read a single image from # standard input - logger.info("reading image from standard input") + print( + "Reading image from standard input...\n" + "Re-run with -h or --help for usage information.", + file=sys.stderr, + ) try: images = [sys.stdin.buffer.read()] except KeyboardInterrupt: @@ -4088,6 +4487,7 @@ and left/right, respectively. It is not possible to specify asymmetric borders. artborder=args.art_border, pdfa=args.pdfa, rotation=args.rotation, + include_thumbnails=args.include_thumbnails, ) except Exception as e: logger.error("error: " + str(e)) |