summaryrefslogtreecommitdiff
path: root/tests/test_pdfa.py
blob: 5757d30e3c5216c43318c6cd8af4ba0c86c64ff4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pytest
from pikepdf import Pdf
import os
import platform
import shutil
from pathlib import Path
from contextlib import suppress
from subprocess import run, PIPE, STDOUT
import xml.etree.ElementTree as ET


try:
    VERAPDF = Path(os.environ['HOME']) / 'verapdf' / 'verapdf'
    NO_PDFA_VALIDATOR = not VERAPDF.is_file()
except Exception:
    NO_PDFA_VALIDATOR = True


def verapdf_validate(filename):
    with open(filename, 'rb') as f:
        proc = run([VERAPDF], stdin=f, stdout=PIPE, stderr=STDOUT, check=True)
        result = proc.stdout.decode('utf-8')

        xml_start = result.find('<?xml version')
        xml = result[xml_start:]

    root = ET.fromstring(xml)
    node = root.find(".//taskResult[@type='VALIDATE']")
    return node.attrib['isExecuted'] == 'true' and \
            node.attrib['isSuccess'] == 'true'


@pytest.mark.skipif(NO_PDFA_VALIDATOR, reason="can't find verapdf")
def test_pdfa_sanity(resources, outdir):
    filename = resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf'

    assert verapdf_validate(filename)

    pdf = Pdf.open(filename)
    pdf.save(outdir / 'pdfa.pdf')

    assert verapdf_validate(outdir / 'pdfa.pdf')
    assert pdf.open_metadata().pdfa_status == '1B'