OCRmyPDF/tests/test_unpaper.py

101 lines
3.0 KiB
Python
Raw Normal View History

2018-03-24 15:21:44 -07:00
# © 2015-17 James R. Barlow: github.com/jbarlow83
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
2018-03-24 15:21:44 -07:00
from os import fspath
from unittest.mock import patch
2018-12-30 01:28:15 -08:00
2018-03-24 15:21:44 -07:00
import pytest
from ocrmypdf._plugin_manager import get_parser_options_plugins
from ocrmypdf._validation import check_options
from ocrmypdf.exceptions import ExitCode, MissingDependencyError
2018-03-24 15:21:44 -07:00
2021-04-07 01:56:51 -07:00
from .conftest import check_ocrmypdf, have_unpaper, run_ocrmypdf
2018-03-24 15:21:44 -07:00
2021-04-07 01:56:51 -07:00
# pylint: disable=redefined-outer-name
2018-03-24 15:21:44 -07:00
def test_no_unpaper(resources, no_outpdf):
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
with patch("ocrmypdf._exec.unpaper.version") as mock:
mock.side_effect = FileNotFoundError("unpaper")
with pytest.raises(MissingDependencyError):
check_options(options, pm)
mock.assert_called()
2018-03-24 15:21:44 -07:00
2020-06-02 02:42:14 -07:00
def test_old_unpaper(resources, no_outpdf):
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
with patch("ocrmypdf._exec.unpaper.version") as mock:
mock.return_value = '0.5'
2020-06-02 02:42:14 -07:00
with pytest.raises(MissingDependencyError):
check_options(options, pm)
mock.assert_called()
2018-03-24 15:21:44 -07:00
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_clean(resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
'--plugin',
'tests/plugins/tesseract_noop.py',
)
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_unpaper_args_valid(resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"--layout double", # Spaces required here
2020-06-01 03:06:40 -07:00
'--plugin',
'tests/plugins/tesseract_noop.py',
)
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_unpaper_args_invalid_filename(resources, outpdf):
2021-04-07 02:09:45 -07:00
p, _out, err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"/etc/passwd",
2020-06-01 03:06:40 -07:00
'--plugin',
'tests/plugins/tesseract_noop.py',
)
assert "No filenames allowed" in err
assert p.returncode == ExitCode.bad_args
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_unpaper_args_invalid(resources, outpdf):
2021-04-07 02:09:45 -07:00
p, _out, _err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"unpaper is not going to like these arguments",
2020-06-01 03:06:40 -07:00
'--plugin',
'tests/plugins/tesseract_noop.py',
)
# Can't tell difference between unpaper choking on bad arguments or some
# other unpaper failure
assert p.returncode == ExitCode.child_process_error