OCRmyPDF/tests/test_unpaper.py

115 lines
3.6 KiB
Python
Raw Normal View History

2018-03-24 15:21:44 -07:00
# © 2015-17 James R. Barlow: github.com/jbarlow83
#
# This file is part of OCRmyPDF.
#
# OCRmyPDF is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OCRmyPDF is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
from os import fspath
from unittest.mock import patch
2018-12-30 01:28:15 -08:00
2018-03-24 15:21:44 -07:00
import pytest
from ocrmypdf._plugin_manager import get_parser_options_plugins
from ocrmypdf._validation import check_options
2020-05-02 03:34:31 -07:00
from ocrmypdf.cli import get_parser
from ocrmypdf.exceptions import ExitCode, MissingDependencyError
2018-03-24 15:21:44 -07:00
# pytest.helpers is dynamic
2020-05-03 00:51:17 -07:00
# pylint: disable=no-member,redefined-outer-name
2018-03-24 15:21:44 -07:00
# pylint: disable=w0612
check_ocrmypdf = pytest.helpers.check_ocrmypdf
run_ocrmypdf = pytest.helpers.run_ocrmypdf
spoof = pytest.helpers.spoof
2020-05-03 00:51:17 -07:00
have_unpaper = pytest.helpers.have_unpaper
@pytest.fixture
2019-06-01 01:55:51 -07:00
def spoof_unpaper_oldversion(tmp_path_factory):
return spoof(tmp_path_factory, unpaper="unpaper_oldversion.py")
2018-03-24 22:24:14 -07:00
2018-03-24 15:21:44 -07:00
def test_no_unpaper(resources, no_outpdf):
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
_parser, options, pm = get_parser_options_plugins(["--clean", input_, output])
with patch("ocrmypdf.exec.unpaper.version") as mock_unpaper_version:
mock_unpaper_version.side_effect = FileNotFoundError("unpaper")
with pytest.raises(MissingDependencyError):
check_options(options, pm)
2018-03-24 15:21:44 -07:00
def test_old_unpaper(spoof_unpaper_oldversion, resources, no_outpdf):
p, out, err = run_ocrmypdf(
resources / "c02-22.pdf", no_outpdf, "--clean", env=spoof_unpaper_oldversion
2018-12-30 01:27:49 -08:00
)
2018-03-24 15:21:44 -07:00
assert p.returncode == ExitCode.missing_dependency
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_clean(resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
'--plugin',
'tests/plugins/tesseract_noop.py',
)
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_unpaper_args_valid(resources, outpdf):
check_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"--layout double", # Spaces required here
2020-06-01 03:06:40 -07:00
'--plugin',
'tests/plugins/tesseract_noop.py',
)
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_unpaper_args_invalid_filename(resources, outpdf):
p, out, err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"/etc/passwd",
2020-06-01 03:06:40 -07:00
'--plugin',
'tests/plugins/tesseract_noop.py',
)
assert "No filenames allowed" in err
assert p.returncode == ExitCode.bad_args
@pytest.mark.skipif(not have_unpaper(), reason="requires unpaper")
2020-06-01 03:06:40 -07:00
def test_unpaper_args_invalid(resources, outpdf):
p, out, err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"unpaper is not going to like these arguments",
2020-06-01 03:06:40 -07:00
'--plugin',
'tests/plugins/tesseract_noop.py',
)
# Can't tell difference between unpaper choking on bad arguments or some
# other unpaper failure
assert p.returncode == ExitCode.child_process_error