Improve encryption tests; drop some public domain resources

Generate the encrypted files we need and remove special test files we retained for this.

Replace jbig2.pdf based on congress.jpg with version based on ccitt.pdf.
This commit is contained in:
James R. Barlow 2022-08-06 14:35:18 -07:00
parent 7d23a661fc
commit 79db985181
No known key found for this signature in database
GPG Key ID: E54A300D567E1260
6 changed files with 19 additions and 15 deletions

View File

@ -3,6 +3,7 @@ Upstream-Name: OCRmyPDF
Upstream-Contact: James R. Barlow <james@purplerock.ca>
Source: https://github.com/ocrmypdf/OCRmyPDF
Files:
.git_archival.txt
docs/images/logo-social.png
@ -48,8 +49,6 @@ Files: tests/resources/c02-22.pdf
tests/resources/congress.jpg
tests/resources/multipage.pdf
tests/resources/palette.pdf
tests/resources/jbig2.pdf
tests/resources/encrypted_algo4.pdf
Copyright: Public domain
License: public-domain
Copyright on these files has expired.
@ -67,9 +66,9 @@ Files: tests/resources/linn.png
tests/resources/linn.txt
tests/resources/ccitt.pdf
tests/resources/cardinal.pdf
tests/resources/jbig2.pdf
tests/resources/skew.pdf
tests/resources/rotated_skew.pdf
tests/resources/skew-encrypted.pdf
tests/resources/poster.pdf
Copyright: (C) 1985 Forat Electronics
License: GFDL-1.2-or-later or CC-BY-SA-3.0

View File

@ -111,14 +111,12 @@ These test resources are assemblies or derivatives from other previously mention
- baiona_alpha.png (from baiona.png, RGB+A version)
- cardinal.pdf (four cardinal directions, baked-in rotated copies of linn.png)
- ccitt.pdf (linn.png, converted to CCITT encoding)
- encrypted_algo4.pdf (congress.jpg, encrypted with algorithm 4 - not supported by PyPDF2)
- graph_ocred.pdf (from graph.pdf)
- jbig2.pdf (congress.jpg, converted to JBIG2 encoding)
- multipage.pdf (from several other files)
- palette.pdf (congress.jpg, converted to a 256-color palette)
- poster.pdf (from linn.png)
- rotated_skew.pdf (a /Rotate'd and skewed document from linn.png)
- skew-encrypted.pdf (skew.pdf with encryption - access supported by PyPDF2, password is "password")
- skew.pdf (from linn.png, skew simulated by adjusting the transformation matrix)
- toc.pdf (from formxobject.pdf, trivial.pdf)

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -292,12 +292,6 @@ def test_input_file_not_a_pdf(caplog, no_outpdf):
assert input_file in caplog.text
def test_encrypted(resources, caplog, no_outpdf):
result = run_ocrmypdf_api(resources / 'skew-encrypted.pdf', no_outpdf)
assert result == ExitCode.encrypted_pdf
assert 'encryption must be removed' in caplog.text
@pytest.mark.parametrize('renderer', RENDERERS)
def test_pagesegmode(renderer, resources, outpdf):
check_ocrmypdf(
@ -398,14 +392,27 @@ def test_tesseract_image_too_big(renderer, resources, outpdf):
)
def test_algo4(resources, outpdf):
p = run_ocrmypdf(
resources / 'encrypted_algo4.pdf',
@pytest.mark.parametrize('encryption_level', [2, 3, 4, 6])
def test_encrypted(resources, outpdf, encryption_level, caplog):
encryption = pikepdf.models.encryption.Encryption(
owner='ocrmypdf',
user='ocrmypdf',
R=encryption_level,
aes=(encryption_level >= 4),
metadata=(encryption_level == 6),
)
with pikepdf.open(resources / 'jbig2.pdf') as pdf:
pdf.save(outpdf, encryption=encryption)
exitcode = run_ocrmypdf_api(
outpdf,
outpdf,
'--plugin',
'tests/plugins/tesseract_noop.py',
)
assert p.returncode == ExitCode.encrypted_pdf
assert exitcode == ExitCode.encrypted_pdf
assert 'encryption must be removed' in caplog.text
def test_jbig2_passthrough(resources, outpdf):