summaryrefslogtreecommitdiff
path: root/tests/test_roundtrip.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_roundtrip.py')
-rwxr-xr-xtests/test_roundtrip.py132
1 files changed, 132 insertions, 0 deletions
diff --git a/tests/test_roundtrip.py b/tests/test_roundtrip.py
new file mode 100755
index 0000000..cb3645e
--- /dev/null
+++ b/tests/test_roundtrip.py
@@ -0,0 +1,132 @@
+#! /usr/bin/env python
+
+# A part of pdfrw (https://github.com/pmaupin/pdfrw)
+# Copyright (C) 2015 Patrick Maupin, Austin, Texas
+# MIT license -- See LICENSE.txt for details
+
+'''
+Run from the directory above like so:
+
+ python -m tests.test_roundtrip
+
+A PDF that has been determined to be good or bad
+should be added to expected.txt with either a good
+checksum, or just the word "fail".
+
+These tests are incomplete, but they allow us to try
+out various PDFs. There is a collection of difficult
+PDFs available on github.
+
+In order to use them:
+
+ 1) Insure that github.com/pmaupin/static_pdfs is on your path.
+
+ 2) Use the imagemagick compare program to look at differences
+ between the static_pdfs/global directory and the tmp_results
+ directory after you run this.
+
+
+'''
+import os
+import hashlib
+import pdfrw
+import static_pdfs
+import expected
+
+from pdfrw.py23_diffs import convert_store
+
+try:
+ import unittest2 as unittest
+except ImportError:
+ import unittest
+
+
+class TestOnePdf(unittest.TestCase):
+
+ def roundtrip(self, testname, basename, srcf, decompress=False,
+ compress=False, repaginate=False):
+ dstd = os.path.join(expected.result_dir, testname)
+ if not os.path.exists(dstd):
+ os.makedirs(dstd)
+ dstf = os.path.join(dstd, basename)
+ hashfile = os.path.join(expected.result_dir, 'hashes.txt')
+ hashkey = '%s/%s' % (testname, basename)
+ hash = '------no-file-generated---------'
+ expects = expected.results[hashkey]
+
+ # If the test has been deliberately skipped,
+ # we are done. Otherwise, execute it even
+ # if we don't know about it yet, so we have
+ # results to compare.
+
+ result = 'fail'
+ size = 0
+ try:
+ if 'skip' in expects:
+ result = 'skip requested'
+ return self.skipTest(result)
+ elif 'xfail' in expects:
+ result = 'xfail requested'
+ return self.fail(result)
+
+ exists = os.path.exists(dstf)
+ if expects or not exists:
+ if exists:
+ os.remove(dstf)
+ trailer = pdfrw.PdfReader(srcf, decompress=decompress,
+ verbose=False)
+ if trailer.Encrypt:
+ result = 'skip -- encrypt'
+ hash = '------skip-encrypt-no-file------'
+ return self.skipTest('File encrypted')
+ writer = pdfrw.PdfWriter(compress=compress)
+ if repaginate:
+ writer.addpages(trailer.pages)
+ trailer = None
+ writer.write(dstf, trailer)
+ with open(dstf, 'rb') as f:
+ data = f.read()
+ size = len(data)
+ if data:
+ hash = hashlib.md5(data).hexdigest()
+ else:
+ os.remove(dstf)
+ if expects:
+ if len(expects) == 1:
+ expects, = expects
+ self.assertEqual(hash, expects)
+ else:
+ self.assertIn(hash, expects)
+ result = 'pass'
+ else:
+ result = 'skip'
+ self.skipTest('No hash available')
+ finally:
+ result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash)
+ with open(hashfile, 'ab') as f:
+ f.write(convert_store(result))
+
+
+def build_tests():
+ def test_closure(*args, **kw):
+ def test(self):
+ self.roundtrip(*args, **kw)
+ return test
+ for mytest, repaginate in (
+ ('simple', False),
+ ('repaginate', True)
+ ):
+ for srcf in static_pdfs.pdffiles[0]:
+ basename = os.path.basename(srcf)
+ test_name = 'test_%s_%s' % (mytest, basename)
+ test = test_closure(mytest, basename, srcf,
+ repaginate=repaginate)
+ setattr(TestOnePdf, test_name, test)
+build_tests()
+
+
+def main():
+ unittest.main()
+
+if __name__ == '__main__':
+ main()