OCRmyPDF/tests/conftest.py
James R. Barlow eb5200d26a Change most tests to use ocrmypdf API instead of subprocess
The main benefit of this is code coverage gains can actually follow it.
Also removes most ugly os.environ hacks.
2019-06-03 01:45:27 -07:00

255 lines
6.7 KiB
Python

# © 2017 James R. Barlow: github.com/jbarlow83
#
# This file is part of OCRmyPDF.
#
# OCRmyPDF is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OCRmyPDF is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
import os
import platform
import sys
from contextlib import contextmanager
from pathlib import Path
from subprocess import PIPE, run
from ocrmypdf import api, cli
import pytest
pytest_plugins = ['helpers_namespace']
try:
from pytest_cov.embed import cleanup_on_sigterm
except ImportError:
pass
else:
cleanup_on_sigterm()
# pylint: disable=E1101
# pytest.helpers is dynamic so it confuses pylint
if sys.version_info.major < 3:
print("Requires Python 3.4+")
sys.exit(1)
@pytest.helpers.register
def is_linux():
return platform.system() == 'Linux'
@pytest.helpers.register
def is_macos():
return platform.system() == 'Darwin'
@pytest.helpers.register
def running_in_docker():
# Docker creates a file named /.dockerenv (newer versions) or
# /.dockerinit (older) -- this is undocumented, not an offical test
return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')
@pytest.helpers.register
def running_in_travis():
return os.environ.get('TRAVIS') == 'true'
@pytest.helpers.register
def needs_pdfminer(fn):
try:
import pdfminer
except ImportError:
skip = pytest.mark.skipif(True, reason="pdfminer not available")
return skip(fn)
return fn
@pytest.helpers.register
def have_unpaper():
try:
from ocrmypdf.exec import unpaper
unpaper.version()
except Exception:
return False
return True
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']
@pytest.helpers.register
def spoof(tmp_path_factory, **kwargs):
"""Modify PATH to override subprocess executables
spoof(program1='replacement', ...)
Creates temporary directory with symlinks to targets.
"""
env = os.environ.copy()
slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))
spoofer_base = tmp_path_factory.mktemp('spoofers')
tmpdir = Path(spoofer_base / slug)
tmpdir.mkdir(parents=True)
for replace_program, with_spoof in kwargs.items():
spoofer = Path(SPOOF_PATH) / with_spoof
spoofer.chmod(0o755)
(tmpdir / replace_program).symlink_to(spoofer)
env['_OCRMYPDF_SAVE_PATH'] = env['PATH']
env['PATH'] = str(tmpdir) + ":" + env['PATH']
return env
@pytest.helpers.register
@contextmanager
def os_environ(new_env):
old_env = os.environ.copy()
if new_env is None:
new_env = {}
for k, v in new_env.items():
if k != 'PYTEST_CURRENT_TEST':
os.environ[k] = v
yield
new_keys = set(os.environ.copy()) - set(old_env)
for k in new_keys:
if k != 'PYTEST_CURRENT_TEST':
del os.environ[k]
for k in old_env:
if k != 'PYTEST_CURRENT_TEST':
os.environ[k] = old_env[k]
for k, v in os.environ.copy().items():
if k != 'PYTEST_CURRENT_TEST':
assert v == old_env[k]
@pytest.fixture(scope='session')
def spoof_tesseract_noop(tmp_path_factory):
return spoof(tmp_path_factory, tesseract='tesseract_noop.py')
@pytest.fixture(scope='session')
def spoof_tesseract_cache(tmp_path_factory):
if running_in_docker():
return os.environ.copy()
return spoof(tmp_path_factory, tesseract="tesseract_cache.py")
@pytest.fixture
def resources():
return Path(TESTS_ROOT) / 'resources'
@pytest.fixture
def ocrmypdf_exec():
return OCRMYPDF
@pytest.fixture(scope="function")
def outdir(tmp_path):
return tmp_path
@pytest.fixture(scope="function")
def outpdf(tmp_path):
return tmp_path / 'out.pdf'
@pytest.fixture(scope="function")
def no_outpdf(tmp_path):
"""This just documents the fact that a test is not expected to produce
output. Unfortunately an assertion failure inside a test fixture produces
an error rather than a test failure, so no testing is done. It's up to
the test to confirm that no output file was created."""
return tmp_path / 'no_output.pdf'
@pytest.helpers.register
def check_ocrmypdf(input_file, output_file, *args, env=None):
"""Run ocrmypdf and confirmed that a valid file was created"""
# p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env)
options = cli.parser.parse_args(
[str(input_file), str(output_file)] + [str(arg) for arg in args]
)
api.check_options(options)
if env:
options.tesseract_env = env
options.tesseract_env['_OCRMYPDF_TEST_INFILE'] = input_file
result = api.run_pipeline(options, api=True)
assert result == 0
assert os.path.exists(str(output_file)), "Output file not created"
assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"
return output_file
@pytest.helpers.register
def run_ocrmypdf(input_file, output_file, *args, env=None, universal_newlines=True):
"Run ocrmypdf and let caller deal with results"
if env is None:
env = os.environ
p_args = (
OCRMYPDF
+ [str(arg) for arg in args if arg is not None]
+ [str(input_file), str(output_file)]
)
p = run(
p_args, stdout=PIPE, stderr=PIPE, universal_newlines=universal_newlines, env=env
)
# print(p.stderr)
return p, p.stdout, p.stderr
@pytest.helpers.register
def first_page_dimensions(pdf):
from ocrmypdf import pdfinfo
info = pdfinfo.PdfInfo(pdf)
page0 = info[0]
return (page0.width_inches, page0.height_inches)
def pytest_addoption(parser):
parser.addoption(
"--runslow",
action="store_true",
default=False,
help=(
"run slow tests only useful for development (unlikely to be "
"useful for downstream packagers)"
),
)
def pytest_collection_modifyitems(config, items):
if config.getoption("--runslow"):
# --runslow given in cli: do not skip slow tests
return
skip_slow = pytest.mark.skip(reason="need --runslow option to run")
for item in items:
if "slow" in item.keywords:
item.add_marker(skip_slow)