diff options
Diffstat (limited to 'tests/test_roundtrip.py')
-rwxr-xr-x | tests/test_roundtrip.py | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/tests/test_roundtrip.py b/tests/test_roundtrip.py new file mode 100755 index 0000000..cb3645e --- /dev/null +++ b/tests/test_roundtrip.py @@ -0,0 +1,132 @@ +#! /usr/bin/env python + +# A part of pdfrw (https://github.com/pmaupin/pdfrw) +# Copyright (C) 2015 Patrick Maupin, Austin, Texas +# MIT license -- See LICENSE.txt for details + +''' +Run from the directory above like so: + + python -m tests.test_roundtrip + +A PDF that has been determined to be good or bad +should be added to expected.txt with either a good +checksum, or just the word "fail". + +These tests are incomplete, but they allow us to try +out various PDFs. There is a collection of difficult +PDFs available on github. + +In order to use them: + + 1) Insure that github.com/pmaupin/static_pdfs is on your path. + + 2) Use the imagemagick compare program to look at differences + between the static_pdfs/global directory and the tmp_results + directory after you run this. + + +''' +import os +import hashlib +import pdfrw +import static_pdfs +import expected + +from pdfrw.py23_diffs import convert_store + +try: + import unittest2 as unittest +except ImportError: + import unittest + + +class TestOnePdf(unittest.TestCase): + + def roundtrip(self, testname, basename, srcf, decompress=False, + compress=False, repaginate=False): + dstd = os.path.join(expected.result_dir, testname) + if not os.path.exists(dstd): + os.makedirs(dstd) + dstf = os.path.join(dstd, basename) + hashfile = os.path.join(expected.result_dir, 'hashes.txt') + hashkey = '%s/%s' % (testname, basename) + hash = '------no-file-generated---------' + expects = expected.results[hashkey] + + # If the test has been deliberately skipped, + # we are done. Otherwise, execute it even + # if we don't know about it yet, so we have + # results to compare. + + result = 'fail' + size = 0 + try: + if 'skip' in expects: + result = 'skip requested' + return self.skipTest(result) + elif 'xfail' in expects: + result = 'xfail requested' + return self.fail(result) + + exists = os.path.exists(dstf) + if expects or not exists: + if exists: + os.remove(dstf) + trailer = pdfrw.PdfReader(srcf, decompress=decompress, + verbose=False) + if trailer.Encrypt: + result = 'skip -- encrypt' + hash = '------skip-encrypt-no-file------' + return self.skipTest('File encrypted') + writer = pdfrw.PdfWriter(compress=compress) + if repaginate: + writer.addpages(trailer.pages) + trailer = None + writer.write(dstf, trailer) + with open(dstf, 'rb') as f: + data = f.read() + size = len(data) + if data: + hash = hashlib.md5(data).hexdigest() + else: + os.remove(dstf) + if expects: + if len(expects) == 1: + expects, = expects + self.assertEqual(hash, expects) + else: + self.assertIn(hash, expects) + result = 'pass' + else: + result = 'skip' + self.skipTest('No hash available') + finally: + result = '%8d %-20s %s %s\n' % (size, result, hashkey, hash) + with open(hashfile, 'ab') as f: + f.write(convert_store(result)) + + +def build_tests(): + def test_closure(*args, **kw): + def test(self): + self.roundtrip(*args, **kw) + return test + for mytest, repaginate in ( + ('simple', False), + ('repaginate', True) + ): + for srcf in static_pdfs.pdffiles[0]: + basename = os.path.basename(srcf) + test_name = 'test_%s_%s' % (mytest, basename) + test = test_closure(mytest, basename, srcf, + repaginate=repaginate) + setattr(TestOnePdf, test_name, test) +build_tests() + + +def main(): + unittest.main() + +if __name__ == '__main__': + main() |