diff options
author | Johannes Schauer <josch@debian.org> | 2017-01-20 05:49:31 +0100 |
---|---|---|
committer | Johannes Schauer <josch@debian.org> | 2017-01-20 05:49:31 +0100 |
commit | f71d3883871752e9ab72bb175c89a378df2af529 (patch) | |
tree | 98fdde17ba8a53ac5d03fe672289b4d3cdeba2cf | |
parent | d3481fe48afe150f38f331048abe6452b8389723 (diff) |
Import upstream version 0.2.3
-rw-r--r-- | PKG-INFO | 6 | ||||
-rw-r--r-- | setup.py | 4 | ||||
-rw-r--r-- | src/img2pdf.egg-info/PKG-INFO | 6 | ||||
-rw-r--r-- | src/img2pdf.egg-info/SOURCES.txt | 2 | ||||
-rwxr-xr-x | src/img2pdf.py | 173 | ||||
-rw-r--r-- | src/jp2.py | 1 | ||||
-rw-r--r-- | src/tests/__init__.py | 44 | ||||
-rw-r--r-- | src/tests/input/mono.png | bin | 0 -> 444 bytes | |||
-rw-r--r-- | src/tests/output/mono.png.pdf | bin | 0 -> 915 bytes |
9 files changed, 180 insertions, 56 deletions
@@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.2.1 +Version: 0.2.3 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3 Description: img2pdf ======= @@ -157,7 +157,7 @@ Classifier: Intended Audience :: Other Audience Classifier: Environment :: Console Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English @@ -1,6 +1,6 @@ from setuptools import setup -VERSION = "0.2.1" +VERSION = "0.2.3" setup( name='img2pdf', @@ -18,7 +18,7 @@ setup( 'Environment :: Console', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: Implementation :: CPython', 'License :: OSI Approved :: GNU Lesser General Public License v3 ' '(LGPLv3)', diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO index b18e9d6..870fa2d 100644 --- a/src/img2pdf.egg-info/PKG-INFO +++ b/src/img2pdf.egg-info/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: img2pdf -Version: 0.2.1 +Version: 0.2.3 Summary: Convert images to PDF via direct JPEG inclusion. Home-page: https://gitlab.mister-muffin.de/josch/img2pdf Author: Johannes 'josch' Schauer Author-email: josch@mister-muffin.de License: LGPL -Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1 +Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3 Description: img2pdf ======= @@ -157,7 +157,7 @@ Classifier: Intended Audience :: Other Audience Classifier: Environment :: Console Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3) Classifier: Natural Language :: English diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt index 192589d..add31f1 100644 --- a/src/img2pdf.egg-info/SOURCES.txt +++ b/src/img2pdf.egg-info/SOURCES.txt @@ -15,9 +15,11 @@ src/img2pdf.egg-info/top_level.txt src/img2pdf.egg-info/zip-safe src/tests/__init__.py src/tests/input/CMYK.jpg +src/tests/input/mono.png src/tests/input/normal.jpg src/tests/input/normal.png src/tests/output/CMYK.jpg.pdf src/tests/output/CMYK.tif.pdf +src/tests/output/mono.png.pdf src/tests/output/normal.jpg.pdf src/tests/output/normal.png.pdf
\ No newline at end of file diff --git a/src/img2pdf.py b/src/img2pdf.py index 2042d13..20fe784 100755 --- a/src/img2pdf.py +++ b/src/img2pdf.py @@ -28,7 +28,7 @@ from enum import Enum from io import BytesIO import logging -__version__ = "0.2.1" +__version__ = "0.2.3" default_dpi = 96.0 papersizes = { "letter": "8.5inx11in", @@ -58,7 +58,7 @@ PageOrientation = Enum('PageOrientation', 'portrait landscape') Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other') -ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 other') +ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 other') PageMode = Enum('PageMode', 'none outlines thumbs') @@ -167,6 +167,8 @@ class MyPdfDict(object): class MyPdfName(): def __getattr__(self, name): return b'/' + name.encode('ascii') + + MyPdfName = MyPdfName() @@ -314,7 +316,7 @@ class pdfdoc(object): self.info[PdfName.Author] = PdfString.encode(author) if creator is not None: self.info[PdfName.Creator] = PdfString.encode(creator) - if producer is not None: + if producer is not None and producer != "": self.info[PdfName.Producer] = PdfString.encode(producer) if creationdate is not None: self.info[PdfName.CreationDate] = \ @@ -354,14 +356,15 @@ class pdfdoc(object): imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth, pageheight): if self.with_pdfrw: - from pdfrw import PdfDict, PdfName + from pdfrw import PdfDict, PdfName, PdfObject from pdfrw.py23_diffs import convert_load else: PdfDict = MyPdfDict PdfName = MyPdfName + PdfObject = MyPdfObject convert_load = my_convert_load - if color == Colorspace.L: + if color == Colorspace['1'] or color == Colorspace.L: colorspace = PdfName.DeviceGray elif color == Colorspace.RGB: colorspace = PdfName.DeviceRGB @@ -372,11 +375,14 @@ class pdfdoc(object): % color.name) # either embed the whole jpeg or deflate the bitmap representation + logging.debug(imgformat) if imgformat is ImageFormat.JPEG: ofilter = [PdfName.DCTDecode] elif imgformat is ImageFormat.JPEG2000: ofilter = [PdfName.JPXDecode] self.writer.version = "1.5" # jpeg2000 needs pdf 1.5 + elif imgformat is ImageFormat.CCITTGroup4: + ofilter = [PdfName.CCITTFaxDecode] else: ofilter = [PdfName.FlateDecode] @@ -389,12 +395,23 @@ class pdfdoc(object): image[PdfName.Height] = imgheightpx image[PdfName.ColorSpace] = colorspace # hardcoded as PIL doesn't provide bits for non-jpeg formats - image[PdfName.BitsPerComponent] = 8 + if imgformat is ImageFormat.CCITTGroup4: + image[PdfName.BitsPerComponent] = 1 + else: + image[PdfName.BitsPerComponent] = 8 if color == Colorspace['CMYK;I']: # Inverts all four channels image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0] + if imgformat is ImageFormat.CCITTGroup4: + decodeparms = PdfDict() + decodeparms[PdfName.K] = -1 + decodeparms[PdfName.BlackIs1] = PdfObject('true') + decodeparms[PdfName.Columns] = imgwidthpx + decodeparms[PdfName.Rows] = imgheightpx + image[PdfName.DecodeParms] = [decodeparms] + text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii") @@ -594,6 +611,45 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None): return (color, ndpi, imgwidthpx, imgheightpx) +def transcode_monochrome(imgdata): + """Convert the open PIL.Image imgdata to compressed CCITT Group4 data""" + + from PIL import TiffImagePlugin + + logging.debug("Converting monochrome to CCITT Group4") + + # Convert the image to Group 4 in memory. If libtiff is not installed and + # Pillow is not compiled against it, .save() will raise an exception. + newimgio = BytesIO() + imgdata.save(newimgio, format='TIFF', compression='group4') + + # Open new image in memory + newimgio.seek(0) + newimg = Image.open(newimgio) + + # If Pillow is passed an invalid compression argument it will ignore it; + # make sure the image actually got compressed. + if newimg.info['compression'] != 'group4': + raise ValueError("Image not compressed as expected") + + # Read the TIFF tags to find the offset(s) of the compressed data strips. + strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS] + strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] + rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP] + + # PIL always seems to create a single strip even for very large TIFFs when + # it saves images, so assume we only have to read a single strip. + # A test ~10 GPixel image was still encoded as a single strip. Just to be + # safe check throw an error if there is more than one offset. + if len(strip_offsets) > 1: + raise NotImplementedError("Transcoding multiple strips not supported") + + newimgio.seek(strip_offsets[0]) + ccittdata = newimgio.read(strip_bytes[0]) + + return ccittdata + + def read_images(rawdata, colorspace, first_frame_only=False): im = BytesIO(rawdata) im.seek(0) @@ -648,11 +704,20 @@ def read_images(rawdata, colorspace, first_frame_only=False): color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata( imgdata, imgformat, default_dpi, colorspace) - # because we do not support /CCITTFaxDecode + newimg = None if color == Colorspace['1']: - logging.debug("Converting colorspace 1 to L") - newimg = imgdata.convert('L') - color = Colorspace.L + try: + ccittdata = transcode_monochrome(imgdata) + imgformat = ImageFormat.CCITTGroup4 + result.append((color, ndpi, imgformat, ccittdata, + imgwidthpx, imgheightpx)) + img_page_count += 1 + continue + except Exception as e: + logging.debug(e) + logging.debug("Converting colorspace 1 to L") + newimg = imgdata.convert('L') + color = Colorspace.L elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK, Colorspace["CMYK;I"]]: logging.debug("Colorspace is OK: %s", color) @@ -927,12 +992,22 @@ def convert(*images, title=None, viewer_fit_window, viewer_center_window, viewer_fullscreen, with_pdfrw) + # backwards compatibility with older img2pdf versions where the first + # argument to the function had to be given as a list + if len(images) == 1: + # if only one argument was given and it is a list, expand it + if isinstance(images[0], (list, tuple)): + images = images[0] + for img in images: # img is allowed to be a path, a binary string representing image data # or a file-like object (really anything that implements read()) try: rawdata = img.read() except AttributeError: + if not isinstance(img, (str, bytes)): + raise TypeError( + "Neither implements read() nor is str or bytes") # the thing doesn't have a read() function, so try if we can treat # it as a file name try: @@ -1256,10 +1331,11 @@ useful to convert JPEG and JPEG2000 images to PDF. The output is sent to standard output so that it can be redirected into a file or to another program as part of a shell pipe. To directly write the output into a file, use the -o or --output option. + +Options: ''', epilog='''\ -Colorspace - +Colorspace: Currently, the colorspace must be forced for JPEG 2000 images that are not in the RGB colorspace. Available colorspace options are based on Python Imaging Library (PIL) short handles. @@ -1270,8 +1346,7 @@ Colorspace CMYK CMYK color CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe) -Paper sizes - +Paper sizes: You can specify the short hand paper size names shown in the first column in the table below as arguments to the --pagesize and --imgsize options. The width and height they are mapping to is shown in the second column. Giving @@ -1282,8 +1357,7 @@ Paper sizes %s -Fit options - +Fit options: The img2pdf options for the --fit argument are shown in the first column in the table below. The function of these options can be mapped to the geometry operators of imagemagick. For users who are familiar with imagemagick, the @@ -1307,8 +1381,32 @@ Fit options enlarge | < | Y | Enlarges an image with dimensions smaller than the given | | | ones (and otherwise behaves like "into"). -Examples +Argument parsing: + Argument long options can be abbreviated to a prefix if the abbreviation is + anambiguous. That is, the prefix must match a unique option. + + Beware of your shell interpreting argument values as special characters (like + the semicolon in the CMYK;I colorspace option). If in doubt, put the argument + values in single quotes. + + If you want an argument value to start with one or more minus characters, you + must use the long option name and join them with an equal sign like so: + + $ img2pdf --author=--test-- + + If your input file name starts with one or more minus characters, either + separate the input files from the other arguments by two minus signs: + $ img2pdf -- --my-file-starts-with-two-minuses.jpg + + Or be more explicit about its relative path by prepending a ./: + + $ img2pdf ./--my-file-starts-with-two-minuses.jpg + + The order of non-positional arguments (all arguments other than the input + images) does not matter. + +Examples: Lines starting with a dollar sign denote commands you can enter into your terminal. The dollar sign signifies your command prompt. It is not part of the command you type. @@ -1340,31 +1438,9 @@ Examples $ img2pdf --output out.pdf --colorspace L input.jp2 -Argument parsing - - Argument long options can be abbreviated to a prefix if the abbreviation is - anambiguous. That is, the prefix must match a unique option. - - Beware of your shell interpreting argument values as special characters (like - the semicolon in the CMYK;I colorspace option). If in doubt, put the argument - values in single quotes. - - If you want an argument value to start with one or more minus characters, you - must use the long option name and join them with an equal sign like so: - - $ img2pdf --author=--test-- - - If your input file name starts with one or more minus characters, either - separate the input files from the other arguments by two minus signs: - - $ img2pdf -- --my-file-starts-with-two-minuses.jpg +Written by Johannes 'josch' Schauer <josch@mister-muffin.de> - Or be more explicit about its relative path by prepending a ./: - - $ img2pdf ./--my-file-starts-with-two-minuses.jpg - - The order of non-positional arguments (all arguments other than the input - images) does not matter. +Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues ''' % rendered_papersizes) parser.add_argument( @@ -1385,7 +1461,7 @@ Argument parsing outargs = parser.add_argument_group( title='General output arguments', - description='') + description='Arguments controlling the output format.') outargs.add_argument( '-o', '--output', metavar='out', type=argparse.FileType('wb'), @@ -1428,8 +1504,7 @@ RGB.''') sizeargs = parser.add_argument_group( title='Image and page size and layout arguments', description='''\ - -Every input image will be placed on its own page. The image size is controlled +Every input image will be placed on its own page. The image size is controlled by the dpi value of the input image or, if unset or missing, the default dpi of %.2f. By default, each page will have the same size as the image it shows. Thus, there will be no visible border between the image and the page border by @@ -1518,8 +1593,10 @@ of the input image. If the orientation of a page gets flipped, then so do the values set via the --border option. ''') - metaargs = parser.add_argument_group(title='Arguments setting metadata', - description='') + metaargs = parser.add_argument_group( + title='Arguments setting metadata', + description='Options handling embedded timestamps, title and author ' + 'information.') metaargs.add_argument( '--title', metavar='title', type=str, help='Sets the title metadata value') @@ -1532,7 +1609,8 @@ values set via the --border option. metaargs.add_argument( '--producer', metavar='producer', type=str, default="img2pdf " + __version__, - help='Sets the producer metadata value (default is: img2pdf)') + help='Sets the producer metadata value ' + '(default is: img2pdf ' + __version__ + ')') metaargs.add_argument( '--creationdate', metavar='creationdate', type=valid_date, help='Sets the UTC creation date metadata value in YYYY-MM-DD or ' @@ -1646,5 +1724,6 @@ values set via the --border option. traceback.print_exc(file=sys.stderr) exit(1) + if __name__ == '__main__': main() @@ -116,6 +116,7 @@ def parsejp2(data): # retrieving the dpi is optional so we do not error out if not present return (width, height, colorspace, hdpi, vdpi) + if __name__ == "__main__": import sys width, height, colorspace = parsejp2(open(sys.argv[1]).read()) diff --git a/src/tests/__init__.py b/src/tests/__init__.py index b668054..506fc48 100644 --- a/src/tests/__init__.py +++ b/src/tests/__init__.py @@ -4,6 +4,8 @@ import os import img2pdf import zlib from PIL import Image +from io import BytesIO +import struct HERE = os.path.dirname(__file__) @@ -396,6 +398,29 @@ layout_test_cases = [ ] +def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4): + # Quick and dirty TIFF header builder from + # https://stackoverflow.com/questions/2641770 + tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h' + return struct.pack( + tiff_header_struct, + b'II', # Byte order indication: Little indian + 42, # Version number (always 42) + 8, # Offset to first IFD + 8, # Number of tags in IFD + 256, 4, 1, width, # ImageWidth, LONG, 1, width + 257, 4, 1, height, # ImageLength, LONG, 1, lenght + 258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1 + 259, 3, 1, ccitt_group, # Compression, SHORT, 1, 4 = CCITT Group 4 + 262, 3, 1, 1, # Threshholding, SHORT, 1, 0 = WhiteIsZero + 273, 4, 1, struct.calcsize( + tiff_header_struct), # StripOffsets, LONG, 1, len of header + 278, 4, 1, height, # RowsPerStrip, LONG, 1, lenght + 279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image + 0 # last IFD + ) + + def test_suite(): class TestImg2Pdf(unittest.TestCase): pass @@ -485,7 +510,8 @@ def test_suite(): # test if the filter is valid: self.assertIn( imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode], - [PdfName.FlateDecode]]) + [PdfName.FlateDecode], + [PdfName.CCITTFaxDecode]]) # test if the colorspace is valid self.assertIn( imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB, @@ -500,6 +526,22 @@ def test_suite(): self.assertEqual( x.Root.Pages.Kids[0].Resources.XObject.Im0.stream, convert_load(orig_imgdata)) + elif imgprops.Filter == [PdfName.CCITTFaxDecode]: + tiff_header = tiff_header_for_ccitt( + int(imgprops.Width), int(imgprops.Height), + int(imgprops.Length), 4) + imgio = BytesIO() + imgio.write(tiff_header) + imgio.write(convert_store( + x.Root.Pages.Kids[0].Resources.XObject.Im0.stream)) + imgio.seek(0) + im = Image.open(imgio) + self.assertEqual(im.tobytes(), orig_img.tobytes()) + try: + im.close() + except AttributeError: + pass + elif imgprops.Filter == [PdfName.FlateDecode]: # otherwise, the data is flate encoded and has to be equal to # the pixel data of the input image diff --git a/src/tests/input/mono.png b/src/tests/input/mono.png Binary files differnew file mode 100644 index 0000000..59b17ad --- /dev/null +++ b/src/tests/input/mono.png diff --git a/src/tests/output/mono.png.pdf b/src/tests/output/mono.png.pdf Binary files differnew file mode 100644 index 0000000..eda3ec7 --- /dev/null +++ b/src/tests/output/mono.png.pdf |