mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-10-01 19:17:02 +00:00

Acrobat insists that PDF/A-1b should not have object streams. Other programs like veraPDF disagree with this restriction, but we can accommodate Acrobat so we will. Also add more tests around this.
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
# © 2021 James R. Barlow: github.com/jbarlow83
|
|
#
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import pikepdf
|
|
import pytest
|
|
|
|
check_ocrmypdf = pytest.helpers.check_ocrmypdf
|
|
|
|
|
|
@pytest.mark.parametrize('optimize', (0, 3))
|
|
@pytest.mark.parametrize('pdfa_level', (1, 2, 3))
|
|
def test_pdfa(resources, outpdf, optimize, pdfa_level):
|
|
check_ocrmypdf(
|
|
resources / 'francais.pdf',
|
|
outpdf,
|
|
'--plugin',
|
|
'tests/plugins/tesseract_noop.py',
|
|
f'--output-type=pdfa-{pdfa_level}',
|
|
f'--optimize={optimize}',
|
|
)
|
|
if pdfa_level in (2, 3):
|
|
# PDF/A-2 allows ObjStm
|
|
assert b'/ObjStm' in outpdf.read_bytes()
|
|
elif pdfa_level == 1:
|
|
# PDF/A-1 might allow ObjStm, but Acrobat does not approve it, so
|
|
# we don't use it
|
|
assert b'/ObjStm' not in outpdf.read_bytes()
|
|
|
|
with pikepdf.open(outpdf) as pdf:
|
|
with pdf.open_metadata() as m:
|
|
assert m.pdfa_status == f'{pdfa_level}B'
|