2017-01-26 13:22:01 -08:00
|
|
|
# © 2017 James R. Barlow: github.com/jbarlow83
|
2018-03-14 14:40:48 -07:00
|
|
|
#
|
|
|
|
# This file is part of OCRmyPDF.
|
|
|
|
#
|
|
|
|
# OCRmyPDF is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# OCRmyPDF is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.
|
2017-01-26 13:22:01 -08:00
|
|
|
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import platform
|
|
|
|
|
|
|
|
pytest_plugins = ['helpers_namespace']
|
|
|
|
|
|
|
|
import pytest
|
2017-01-26 16:38:59 -08:00
|
|
|
from pathlib import Path
|
|
|
|
from subprocess import Popen, PIPE
|
2017-01-26 13:22:01 -08:00
|
|
|
|
|
|
|
|
2018-06-23 00:54:22 -07:00
|
|
|
# pylint: disable=E1101
|
|
|
|
# pytest.helpers is dynamic so it confuses pylint
|
|
|
|
|
2017-01-26 13:22:01 -08:00
|
|
|
if sys.version_info.major < 3:
|
|
|
|
print("Requires Python 3.4+")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.helpers.register
|
|
|
|
def is_linux():
|
|
|
|
return platform.system() == 'Linux'
|
|
|
|
|
|
|
|
|
2017-03-02 22:27:06 -08:00
|
|
|
@pytest.helpers.register
|
|
|
|
def is_macos():
|
|
|
|
return platform.system() == 'Darwin'
|
|
|
|
|
|
|
|
|
2017-01-26 13:22:01 -08:00
|
|
|
@pytest.helpers.register
|
|
|
|
def running_in_docker():
|
2017-02-13 02:16:06 -08:00
|
|
|
# Docker creates a file named /.dockerenv (newer versions) or
|
|
|
|
# /.dockerinit (older) -- this is undocumented, not an offical test
|
|
|
|
return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
|
2017-03-02 22:27:06 -08:00
|
|
|
@pytest.helpers.register
|
|
|
|
def running_in_travis():
|
|
|
|
return os.environ.get('TRAVIS') == 'true'
|
|
|
|
|
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
|
|
|
|
SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')
|
|
|
|
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
|
|
|
|
OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.helpers.register
|
2018-03-24 15:07:02 -07:00
|
|
|
def spoof(tmpdir_factory, **kwargs):
|
|
|
|
"""Modify PATH to override subprocess executables
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
spoof(program1='replacement', ...)
|
|
|
|
|
2018-03-24 15:07:02 -07:00
|
|
|
Creates temporary directory with symlinks to targets.
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
"""
|
|
|
|
env = os.environ.copy()
|
2018-03-24 15:07:02 -07:00
|
|
|
slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))
|
2018-03-25 00:52:45 -07:00
|
|
|
spoofer_base = Path(str(tmpdir_factory.mktemp('spoofers')))
|
2018-03-24 15:07:02 -07:00
|
|
|
tmpdir = spoofer_base / slug
|
|
|
|
tmpdir.mkdir(parents=True)
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
for replace_program, with_spoof in kwargs.items():
|
2018-03-24 15:07:02 -07:00
|
|
|
spoofer = Path(SPOOF_PATH) / with_spoof
|
|
|
|
spoofer.chmod(0o755)
|
|
|
|
(tmpdir / replace_program).symlink_to(spoofer)
|
|
|
|
|
|
|
|
env['_OCRMYPDF_SAVE_PATH'] = env['PATH']
|
|
|
|
env['PATH'] = str(tmpdir) + ":" + env['PATH']
|
2018-06-23 00:54:22 -07:00
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
return env
|
|
|
|
|
|
|
|
|
2018-03-24 15:07:02 -07:00
|
|
|
@pytest.fixture(scope='session')
|
|
|
|
def spoof_tesseract_noop(tmpdir_factory):
|
|
|
|
return spoof(tmpdir_factory, tesseract='tesseract_noop.py')
|
2017-05-29 12:47:55 -07:00
|
|
|
|
|
|
|
|
2018-03-24 15:07:02 -07:00
|
|
|
@pytest.fixture(scope='session')
|
|
|
|
def spoof_tesseract_cache(tmpdir_factory):
|
2017-05-29 12:47:55 -07:00
|
|
|
if running_in_docker():
|
|
|
|
return os.environ.copy()
|
2018-03-24 15:07:02 -07:00
|
|
|
return spoof(tmpdir_factory, tesseract="tesseract_cache.py")
|
2017-05-29 12:47:55 -07:00
|
|
|
|
|
|
|
|
2017-01-26 16:38:59 -08:00
|
|
|
@pytest.fixture
|
|
|
|
def resources():
|
|
|
|
return Path(TESTS_ROOT) / 'resources'
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def ocrmypdf_exec():
|
|
|
|
return OCRMYPDF
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
|
|
def outdir(tmpdir):
|
2017-01-26 17:19:15 -08:00
|
|
|
return Path(str(tmpdir))
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
|
|
def outpdf(tmpdir):
|
2017-01-26 17:19:15 -08:00
|
|
|
return str(Path(str(tmpdir)) / 'out.pdf')
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
|
|
def no_outpdf(tmpdir):
|
|
|
|
"""This just documents the fact that a test is not expected to produce
|
|
|
|
output. Unfortunately an assertion failure inside a test fixture produces
|
|
|
|
an error rather than a test failure, so no testing is done. It's up to
|
|
|
|
the test to confirm that no output file was created."""
|
2017-01-26 17:19:15 -08:00
|
|
|
return str(Path(str(tmpdir)) / 'no_output.pdf')
|
2017-01-26 16:38:59 -08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.helpers.register
|
|
|
|
def check_ocrmypdf(input_file, output_file, *args, env=None):
|
|
|
|
"Run ocrmypdf and confirmed that a valid file was created"
|
|
|
|
|
|
|
|
p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env)
|
2018-10-03 14:23:34 -07:00
|
|
|
# ensure py.test collects the output, use -s to view
|
|
|
|
print(err, file=sys.stderr)
|
2018-03-24 22:17:36 -07:00
|
|
|
assert p.returncode == 0
|
2017-01-26 16:38:59 -08:00
|
|
|
assert os.path.exists(str(output_file)), "Output file not created"
|
|
|
|
assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"
|
|
|
|
assert out == "", \
|
|
|
|
"The following was written to stdout and should not have been: \n" + \
|
|
|
|
"<stdout>\n" + out + "\n</stdout>"
|
|
|
|
return output_file
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.helpers.register
|
2018-08-03 00:42:59 -07:00
|
|
|
def run_ocrmypdf(input_file, output_file, *args, env=None,
|
|
|
|
universal_newlines=True):
|
2017-01-26 16:38:59 -08:00
|
|
|
"Run ocrmypdf and let caller deal with results"
|
|
|
|
|
|
|
|
if env is None:
|
|
|
|
env = os.environ
|
|
|
|
|
2017-07-21 16:39:22 -07:00
|
|
|
p_args = OCRMYPDF + [str(arg) for arg in args] + \
|
|
|
|
[str(input_file), str(output_file)]
|
2017-01-26 16:38:59 -08:00
|
|
|
p = Popen(
|
|
|
|
p_args, close_fds=True, stdout=PIPE, stderr=PIPE,
|
2018-08-03 00:42:59 -07:00
|
|
|
universal_newlines=universal_newlines, env=env)
|
2017-01-26 16:38:59 -08:00
|
|
|
out, err = p.communicate()
|
|
|
|
#print(err)
|
|
|
|
|
|
|
|
return p, out, err
|
2017-03-24 13:23:03 -07:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.helpers.register
|
|
|
|
def first_page_dimensions(pdf):
|
2017-05-19 15:48:23 -07:00
|
|
|
from ocrmypdf import pdfinfo
|
|
|
|
info = pdfinfo.PdfInfo(pdf)
|
2017-03-24 13:23:03 -07:00
|
|
|
page0 = info[0]
|
2017-05-19 16:17:36 -07:00
|
|
|
return (page0.width_inches, page0.height_inches)
|
2018-08-03 00:42:59 -07:00
|
|
|
|
|
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
|
|
parser.addoption(
|
2018-08-03 00:57:59 -07:00
|
|
|
"--runslow", action="store_true", default=False,
|
|
|
|
help=("run slow tests only useful for development (unlikely to be "
|
|
|
|
"useful for downstream packagers)")
|
2018-08-03 00:42:59 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def pytest_collection_modifyitems(config, items):
|
|
|
|
if config.getoption("--runslow"):
|
|
|
|
# --runslow given in cli: do not skip slow tests
|
|
|
|
return
|
|
|
|
skip_slow = pytest.mark.skip(reason="need --runslow option to run")
|
|
|
|
for item in items:
|
|
|
|
if "slow" in item.keywords:
|
|
|
|
item.add_marker(skip_slow)
|