summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohannes Schauer <josch@debian.org>2017-01-20 05:49:31 +0100
committerJohannes Schauer <josch@debian.org>2017-01-20 05:49:31 +0100
commitf71d3883871752e9ab72bb175c89a378df2af529 (patch)
tree98fdde17ba8a53ac5d03fe672289b4d3cdeba2cf /src
parentd3481fe48afe150f38f331048abe6452b8389723 (diff)
Import upstream version 0.2.3
Diffstat (limited to 'src')
-rw-r--r--src/img2pdf.egg-info/PKG-INFO6
-rw-r--r--src/img2pdf.egg-info/SOURCES.txt2
-rwxr-xr-xsrc/img2pdf.py173
-rw-r--r--src/jp2.py1
-rw-r--r--src/tests/__init__.py44
-rw-r--r--src/tests/input/mono.pngbin0 -> 444 bytes
-rw-r--r--src/tests/output/mono.png.pdfbin0 -> 915 bytes
7 files changed, 175 insertions, 51 deletions
diff --git a/src/img2pdf.egg-info/PKG-INFO b/src/img2pdf.egg-info/PKG-INFO
index b18e9d6..870fa2d 100644
--- a/src/img2pdf.egg-info/PKG-INFO
+++ b/src/img2pdf.egg-info/PKG-INFO
@@ -1,12 +1,12 @@
Metadata-Version: 1.1
Name: img2pdf
-Version: 0.2.1
+Version: 0.2.3
Summary: Convert images to PDF via direct JPEG inclusion.
Home-page: https://gitlab.mister-muffin.de/josch/img2pdf
Author: Johannes 'josch' Schauer
Author-email: josch@mister-muffin.de
License: LGPL
-Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.1
+Download-URL: https://gitlab.mister-muffin.de/josch/img2pdf/repository/archive.tar.gz?ref=0.2.3
Description: img2pdf
=======
@@ -157,7 +157,7 @@ Classifier: Intended Audience :: Other Audience
Classifier: Environment :: Console
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
Classifier: Natural Language :: English
diff --git a/src/img2pdf.egg-info/SOURCES.txt b/src/img2pdf.egg-info/SOURCES.txt
index 192589d..add31f1 100644
--- a/src/img2pdf.egg-info/SOURCES.txt
+++ b/src/img2pdf.egg-info/SOURCES.txt
@@ -15,9 +15,11 @@ src/img2pdf.egg-info/top_level.txt
src/img2pdf.egg-info/zip-safe
src/tests/__init__.py
src/tests/input/CMYK.jpg
+src/tests/input/mono.png
src/tests/input/normal.jpg
src/tests/input/normal.png
src/tests/output/CMYK.jpg.pdf
src/tests/output/CMYK.tif.pdf
+src/tests/output/mono.png.pdf
src/tests/output/normal.jpg.pdf
src/tests/output/normal.png.pdf \ No newline at end of file
diff --git a/src/img2pdf.py b/src/img2pdf.py
index 2042d13..20fe784 100755
--- a/src/img2pdf.py
+++ b/src/img2pdf.py
@@ -28,7 +28,7 @@ from enum import Enum
from io import BytesIO
import logging
-__version__ = "0.2.1"
+__version__ = "0.2.3"
default_dpi = 96.0
papersizes = {
"letter": "8.5inx11in",
@@ -58,7 +58,7 @@ PageOrientation = Enum('PageOrientation', 'portrait landscape')
Colorspace = Enum('Colorspace', 'RGB L 1 CMYK CMYK;I RGBA P other')
-ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 other')
+ImageFormat = Enum('ImageFormat', 'JPEG JPEG2000 CCITTGroup4 other')
PageMode = Enum('PageMode', 'none outlines thumbs')
@@ -167,6 +167,8 @@ class MyPdfDict(object):
class MyPdfName():
def __getattr__(self, name):
return b'/' + name.encode('ascii')
+
+
MyPdfName = MyPdfName()
@@ -314,7 +316,7 @@ class pdfdoc(object):
self.info[PdfName.Author] = PdfString.encode(author)
if creator is not None:
self.info[PdfName.Creator] = PdfString.encode(creator)
- if producer is not None:
+ if producer is not None and producer != "":
self.info[PdfName.Producer] = PdfString.encode(producer)
if creationdate is not None:
self.info[PdfName.CreationDate] = \
@@ -354,14 +356,15 @@ class pdfdoc(object):
imgwidthpdf, imgheightpdf, imgxpdf, imgypdf, pagewidth,
pageheight):
if self.with_pdfrw:
- from pdfrw import PdfDict, PdfName
+ from pdfrw import PdfDict, PdfName, PdfObject
from pdfrw.py23_diffs import convert_load
else:
PdfDict = MyPdfDict
PdfName = MyPdfName
+ PdfObject = MyPdfObject
convert_load = my_convert_load
- if color == Colorspace.L:
+ if color == Colorspace['1'] or color == Colorspace.L:
colorspace = PdfName.DeviceGray
elif color == Colorspace.RGB:
colorspace = PdfName.DeviceRGB
@@ -372,11 +375,14 @@ class pdfdoc(object):
% color.name)
# either embed the whole jpeg or deflate the bitmap representation
+ logging.debug(imgformat)
if imgformat is ImageFormat.JPEG:
ofilter = [PdfName.DCTDecode]
elif imgformat is ImageFormat.JPEG2000:
ofilter = [PdfName.JPXDecode]
self.writer.version = "1.5" # jpeg2000 needs pdf 1.5
+ elif imgformat is ImageFormat.CCITTGroup4:
+ ofilter = [PdfName.CCITTFaxDecode]
else:
ofilter = [PdfName.FlateDecode]
@@ -389,12 +395,23 @@ class pdfdoc(object):
image[PdfName.Height] = imgheightpx
image[PdfName.ColorSpace] = colorspace
# hardcoded as PIL doesn't provide bits for non-jpeg formats
- image[PdfName.BitsPerComponent] = 8
+ if imgformat is ImageFormat.CCITTGroup4:
+ image[PdfName.BitsPerComponent] = 1
+ else:
+ image[PdfName.BitsPerComponent] = 8
if color == Colorspace['CMYK;I']:
# Inverts all four channels
image[PdfName.Decode] = [1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
+ if imgformat is ImageFormat.CCITTGroup4:
+ decodeparms = PdfDict()
+ decodeparms[PdfName.K] = -1
+ decodeparms[PdfName.BlackIs1] = PdfObject('true')
+ decodeparms[PdfName.Columns] = imgwidthpx
+ decodeparms[PdfName.Rows] = imgheightpx
+ image[PdfName.DecodeParms] = [decodeparms]
+
text = ("q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" %
(imgwidthpdf, imgheightpdf, imgxpdf, imgypdf)).encode("ascii")
@@ -594,6 +611,45 @@ def get_imgmetadata(imgdata, imgformat, default_dpi, colorspace, rawdata=None):
return (color, ndpi, imgwidthpx, imgheightpx)
+def transcode_monochrome(imgdata):
+ """Convert the open PIL.Image imgdata to compressed CCITT Group4 data"""
+
+ from PIL import TiffImagePlugin
+
+ logging.debug("Converting monochrome to CCITT Group4")
+
+ # Convert the image to Group 4 in memory. If libtiff is not installed and
+ # Pillow is not compiled against it, .save() will raise an exception.
+ newimgio = BytesIO()
+ imgdata.save(newimgio, format='TIFF', compression='group4')
+
+ # Open new image in memory
+ newimgio.seek(0)
+ newimg = Image.open(newimgio)
+
+ # If Pillow is passed an invalid compression argument it will ignore it;
+ # make sure the image actually got compressed.
+ if newimg.info['compression'] != 'group4':
+ raise ValueError("Image not compressed as expected")
+
+ # Read the TIFF tags to find the offset(s) of the compressed data strips.
+ strip_offsets = newimg.tag_v2[TiffImagePlugin.STRIPOFFSETS]
+ strip_bytes = newimg.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]
+ rows_per_strip = newimg.tag_v2[TiffImagePlugin.ROWSPERSTRIP]
+
+ # PIL always seems to create a single strip even for very large TIFFs when
+ # it saves images, so assume we only have to read a single strip.
+ # A test ~10 GPixel image was still encoded as a single strip. Just to be
+ # safe check throw an error if there is more than one offset.
+ if len(strip_offsets) > 1:
+ raise NotImplementedError("Transcoding multiple strips not supported")
+
+ newimgio.seek(strip_offsets[0])
+ ccittdata = newimgio.read(strip_bytes[0])
+
+ return ccittdata
+
+
def read_images(rawdata, colorspace, first_frame_only=False):
im = BytesIO(rawdata)
im.seek(0)
@@ -648,11 +704,20 @@ def read_images(rawdata, colorspace, first_frame_only=False):
color, ndpi, imgwidthpx, imgheightpx = get_imgmetadata(
imgdata, imgformat, default_dpi, colorspace)
- # because we do not support /CCITTFaxDecode
+ newimg = None
if color == Colorspace['1']:
- logging.debug("Converting colorspace 1 to L")
- newimg = imgdata.convert('L')
- color = Colorspace.L
+ try:
+ ccittdata = transcode_monochrome(imgdata)
+ imgformat = ImageFormat.CCITTGroup4
+ result.append((color, ndpi, imgformat, ccittdata,
+ imgwidthpx, imgheightpx))
+ img_page_count += 1
+ continue
+ except Exception as e:
+ logging.debug(e)
+ logging.debug("Converting colorspace 1 to L")
+ newimg = imgdata.convert('L')
+ color = Colorspace.L
elif color in [Colorspace.RGB, Colorspace.L, Colorspace.CMYK,
Colorspace["CMYK;I"]]:
logging.debug("Colorspace is OK: %s", color)
@@ -927,12 +992,22 @@ def convert(*images, title=None,
viewer_fit_window, viewer_center_window, viewer_fullscreen,
with_pdfrw)
+ # backwards compatibility with older img2pdf versions where the first
+ # argument to the function had to be given as a list
+ if len(images) == 1:
+ # if only one argument was given and it is a list, expand it
+ if isinstance(images[0], (list, tuple)):
+ images = images[0]
+
for img in images:
# img is allowed to be a path, a binary string representing image data
# or a file-like object (really anything that implements read())
try:
rawdata = img.read()
except AttributeError:
+ if not isinstance(img, (str, bytes)):
+ raise TypeError(
+ "Neither implements read() nor is str or bytes")
# the thing doesn't have a read() function, so try if we can treat
# it as a file name
try:
@@ -1256,10 +1331,11 @@ useful to convert JPEG and JPEG2000 images to PDF.
The output is sent to standard output so that it can be redirected into a file
or to another program as part of a shell pipe. To directly write the output
into a file, use the -o or --output option.
+
+Options:
''',
epilog='''\
-Colorspace
-
+Colorspace:
Currently, the colorspace must be forced for JPEG 2000 images that are not in
the RGB colorspace. Available colorspace options are based on Python Imaging
Library (PIL) short handles.
@@ -1270,8 +1346,7 @@ Colorspace
CMYK CMYK color
CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe)
-Paper sizes
-
+Paper sizes:
You can specify the short hand paper size names shown in the first column in
the table below as arguments to the --pagesize and --imgsize options. The
width and height they are mapping to is shown in the second column. Giving
@@ -1282,8 +1357,7 @@ Paper sizes
%s
-Fit options
-
+Fit options:
The img2pdf options for the --fit argument are shown in the first column in
the table below. The function of these options can be mapped to the geometry
operators of imagemagick. For users who are familiar with imagemagick, the
@@ -1307,8 +1381,32 @@ Fit options
enlarge | < | Y | Enlarges an image with dimensions smaller than the given
| | | ones (and otherwise behaves like "into").
-Examples
+Argument parsing:
+ Argument long options can be abbreviated to a prefix if the abbreviation is
+ anambiguous. That is, the prefix must match a unique option.
+
+ Beware of your shell interpreting argument values as special characters (like
+ the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
+ values in single quotes.
+
+ If you want an argument value to start with one or more minus characters, you
+ must use the long option name and join them with an equal sign like so:
+
+ $ img2pdf --author=--test--
+
+ If your input file name starts with one or more minus characters, either
+ separate the input files from the other arguments by two minus signs:
+ $ img2pdf -- --my-file-starts-with-two-minuses.jpg
+
+ Or be more explicit about its relative path by prepending a ./:
+
+ $ img2pdf ./--my-file-starts-with-two-minuses.jpg
+
+ The order of non-positional arguments (all arguments other than the input
+ images) does not matter.
+
+Examples:
Lines starting with a dollar sign denote commands you can enter into your
terminal. The dollar sign signifies your command prompt. It is not part of
the command you type.
@@ -1340,31 +1438,9 @@ Examples
$ img2pdf --output out.pdf --colorspace L input.jp2
-Argument parsing
-
- Argument long options can be abbreviated to a prefix if the abbreviation is
- anambiguous. That is, the prefix must match a unique option.
-
- Beware of your shell interpreting argument values as special characters (like
- the semicolon in the CMYK;I colorspace option). If in doubt, put the argument
- values in single quotes.
-
- If you want an argument value to start with one or more minus characters, you
- must use the long option name and join them with an equal sign like so:
-
- $ img2pdf --author=--test--
-
- If your input file name starts with one or more minus characters, either
- separate the input files from the other arguments by two minus signs:
-
- $ img2pdf -- --my-file-starts-with-two-minuses.jpg
+Written by Johannes 'josch' Schauer <josch@mister-muffin.de>
- Or be more explicit about its relative path by prepending a ./:
-
- $ img2pdf ./--my-file-starts-with-two-minuses.jpg
-
- The order of non-positional arguments (all arguments other than the input
- images) does not matter.
+Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues
''' % rendered_papersizes)
parser.add_argument(
@@ -1385,7 +1461,7 @@ Argument parsing
outargs = parser.add_argument_group(
title='General output arguments',
- description='')
+ description='Arguments controlling the output format.')
outargs.add_argument(
'-o', '--output', metavar='out', type=argparse.FileType('wb'),
@@ -1428,8 +1504,7 @@ RGB.''')
sizeargs = parser.add_argument_group(
title='Image and page size and layout arguments',
description='''\
-
-Every input image will be placed on its own page. The image size is controlled
+Every input image will be placed on its own page. The image size is controlled
by the dpi value of the input image or, if unset or missing, the default dpi of
%.2f. By default, each page will have the same size as the image it shows.
Thus, there will be no visible border between the image and the page border by
@@ -1518,8 +1593,10 @@ of the input image. If the orientation of a page gets flipped, then so do the
values set via the --border option.
''')
- metaargs = parser.add_argument_group(title='Arguments setting metadata',
- description='')
+ metaargs = parser.add_argument_group(
+ title='Arguments setting metadata',
+ description='Options handling embedded timestamps, title and author '
+ 'information.')
metaargs.add_argument(
'--title', metavar='title', type=str,
help='Sets the title metadata value')
@@ -1532,7 +1609,8 @@ values set via the --border option.
metaargs.add_argument(
'--producer', metavar='producer', type=str,
default="img2pdf " + __version__,
- help='Sets the producer metadata value (default is: img2pdf)')
+ help='Sets the producer metadata value '
+ '(default is: img2pdf ' + __version__ + ')')
metaargs.add_argument(
'--creationdate', metavar='creationdate', type=valid_date,
help='Sets the UTC creation date metadata value in YYYY-MM-DD or '
@@ -1646,5 +1724,6 @@ values set via the --border option.
traceback.print_exc(file=sys.stderr)
exit(1)
+
if __name__ == '__main__':
main()
diff --git a/src/jp2.py b/src/jp2.py
index 7f61312..30edb7e 100644
--- a/src/jp2.py
+++ b/src/jp2.py
@@ -116,6 +116,7 @@ def parsejp2(data):
# retrieving the dpi is optional so we do not error out if not present
return (width, height, colorspace, hdpi, vdpi)
+
if __name__ == "__main__":
import sys
width, height, colorspace = parsejp2(open(sys.argv[1]).read())
diff --git a/src/tests/__init__.py b/src/tests/__init__.py
index b668054..506fc48 100644
--- a/src/tests/__init__.py
+++ b/src/tests/__init__.py
@@ -4,6 +4,8 @@ import os
import img2pdf
import zlib
from PIL import Image
+from io import BytesIO
+import struct
HERE = os.path.dirname(__file__)
@@ -396,6 +398,29 @@ layout_test_cases = [
]
+def tiff_header_for_ccitt(width, height, img_size, ccitt_group=4):
+ # Quick and dirty TIFF header builder from
+ # https://stackoverflow.com/questions/2641770
+ tiff_header_struct = '<' + '2s' + 'h' + 'l' + 'h' + 'hhll' * 8 + 'h'
+ return struct.pack(
+ tiff_header_struct,
+ b'II', # Byte order indication: Little indian
+ 42, # Version number (always 42)
+ 8, # Offset to first IFD
+ 8, # Number of tags in IFD
+ 256, 4, 1, width, # ImageWidth, LONG, 1, width
+ 257, 4, 1, height, # ImageLength, LONG, 1, lenght
+ 258, 3, 1, 1, # BitsPerSample, SHORT, 1, 1
+ 259, 3, 1, ccitt_group, # Compression, SHORT, 1, 4 = CCITT Group 4
+ 262, 3, 1, 1, # Threshholding, SHORT, 1, 0 = WhiteIsZero
+ 273, 4, 1, struct.calcsize(
+ tiff_header_struct), # StripOffsets, LONG, 1, len of header
+ 278, 4, 1, height, # RowsPerStrip, LONG, 1, lenght
+ 279, 4, 1, img_size, # StripByteCounts, LONG, 1, size of image
+ 0 # last IFD
+ )
+
+
def test_suite():
class TestImg2Pdf(unittest.TestCase):
pass
@@ -485,7 +510,8 @@ def test_suite():
# test if the filter is valid:
self.assertIn(
imgprops.Filter, [[PdfName.DCTDecode], [PdfName.JPXDecode],
- [PdfName.FlateDecode]])
+ [PdfName.FlateDecode],
+ [PdfName.CCITTFaxDecode]])
# test if the colorspace is valid
self.assertIn(
imgprops.ColorSpace, [PdfName.DeviceGray, PdfName.DeviceRGB,
@@ -500,6 +526,22 @@ def test_suite():
self.assertEqual(
x.Root.Pages.Kids[0].Resources.XObject.Im0.stream,
convert_load(orig_imgdata))
+ elif imgprops.Filter == [PdfName.CCITTFaxDecode]:
+ tiff_header = tiff_header_for_ccitt(
+ int(imgprops.Width), int(imgprops.Height),
+ int(imgprops.Length), 4)
+ imgio = BytesIO()
+ imgio.write(tiff_header)
+ imgio.write(convert_store(
+ x.Root.Pages.Kids[0].Resources.XObject.Im0.stream))
+ imgio.seek(0)
+ im = Image.open(imgio)
+ self.assertEqual(im.tobytes(), orig_img.tobytes())
+ try:
+ im.close()
+ except AttributeError:
+ pass
+
elif imgprops.Filter == [PdfName.FlateDecode]:
# otherwise, the data is flate encoded and has to be equal to
# the pixel data of the input image
diff --git a/src/tests/input/mono.png b/src/tests/input/mono.png
new file mode 100644
index 0000000..59b17ad
--- /dev/null
+++ b/src/tests/input/mono.png
Binary files differ
diff --git a/src/tests/output/mono.png.pdf b/src/tests/output/mono.png.pdf
new file mode 100644
index 0000000..eda3ec7
--- /dev/null
+++ b/src/tests/output/mono.png.pdf
Binary files differ