OCRmyPDF/tests/conftest.py

# © 2017 James R. Barlow: github.com/jbarlow83
#
# This file is part of OCRmyPDF.
#
# OCRmyPDF is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OCRmyPDF is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.

import ast
import os
import platform
import sys
from pathlib import Path
from subprocess import PIPE, run

import pytest

from ocrmypdf import api, cli

pytest_plugins = ['helpers_namespace']


# pylint: disable=E1101
# pytest.helpers is dynamic so it confuses pylint

if sys.version_info < (3, 5):
    print("Requires Python 3.5+")
    sys.exit(1)


@pytest.helpers.register
def is_linux():
    return platform.system() == 'Linux'


@pytest.helpers.register
def is_macos():
    return platform.system() == 'Darwin'


@pytest.helpers.register
def running_in_docker():
    # Docker creates a file named /.dockerenv (newer versions) or
    # /.dockerinit (older) -- this is undocumented, not an offical test
    return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')


@pytest.helpers.register
def running_in_travis():
    return os.environ.get('TRAVIS') == 'true'


@pytest.helpers.register
def have_unpaper():
    try:
        from ocrmypdf.exec import unpaper

        unpaper.version()
    except Exception:
        return False
    return True


TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']


WINDOWS_SHIM_TEMPLATE = """
# This is a shim for Windows that has the same effect as a symlink to the target .py
# file
import os
import subprocess
import sys

args = [sys.executable, {spoofer}, *sys.argv[1:]]
p = subprocess.run(args, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
sys.stdout.buffer.write(p.stdout)
sys.stderr.buffer.write(p.stderr)
sys.exit(p.returncode)
"""

assert ast.parse(WINDOWS_SHIM_TEMPLATE.format(spoofer=repr(r"C:\\Temp\\file.py")))


@pytest.helpers.register
def spoof(tmp_path_factory, **kwargs):
    """Modify PATH to override subprocess executables

    spoof(tmp_path_factory, program1='replacement', ...)

    For the test suite we need a way override executables, so that we can
    substitute desired results such as errors or just speed up OCR.

    On POSIXish platforms we create a temporary folder with overrides that
    are symlinks to the executables we want to run. We do not actually override
    PATH. We also set an environment variable _OCRMYPDF_TEST_PATH, which
    OCRmyPDF's subprocess wrapper will check before they use regular PATH. The
    output is a folder full of executables we are overriding. We can override
    multiple executables. The end result is a folder we can use in a PATH-style
    lookup to override some executables:

        /tmp/abcxyz/tesseract -> ocrmypdf/tests/resources/spoof/tesseract_crash.py
        /tmp/abcxyz/gs -> ocrmypdf/tests/resources/spoof/gs_backflip.py

    Windows needs extra help from us because usually, only the Administrator
    can create symlinks. Instead we create small Python scripts that call
    the programs we want, implementing the effect of a symlink. This is cleaner
    than creating Windows executables or trying to use non-Python scripts.
    The temporary folder generated for Windows could like:

        %TEMP%\abcxyz\tesseract.py:
            (script that runs ocrmypdf/tests/resources/spoof/tesseract_crash.py)
        %TEMP%\abcxyz\gswin32c.py:
            (script that runs ocrmypdf/tests/resources/spoof/gs_backflip.py)
        %TEMP%\abcxyz\gswin64c.py:
            (script that runs ocrmypdf/tests/resources/spoof/gs_backflip.py)

    We also address one quirk here, that Ghostscript may be known as gswin32c
    or gswin64c, depending on what the user installed (regardless of Windows
    itself). On POSIX, Ghostscript is just 'gs'.  We handle the special case here
    too.

    All of this is intimately dependent on the machinery in ocrmypdf.exec.run().
    In particular, for Windows, that code has to know that if there is a .py
    file, it needs to run it with Python, since Windows does not like being
    asked to execute files.

    We don't overload PATH directly because we have some tests where we call
    ocrmypdf as a subprocess (to exercise the command line interface) and some
    tests where we call it as an API.
    """
    env = os.environ.copy()
    slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))
    spoofer_base = tmp_path_factory.mktemp('spoofers')
    tmpdir = Path(spoofer_base / slug)
    tmpdir.mkdir(parents=True)

    for replace_program, with_spoof in kwargs.items():
        spoofer = Path(SPOOF_PATH) / with_spoof
        if os.name != 'nt':
            spoofer.chmod(0o755)
            (tmpdir / replace_program).symlink_to(spoofer)
        else:
            py_file = WINDOWS_SHIM_TEMPLATE.format(
                spoofer=repr(os.fspath(spoofer.absolute()))
            )
            if replace_program == 'gs':
                programs = ['gswin64c', 'gswin32c']
            else:
                programs = [replace_program]
            for prog in programs:
                (tmpdir / f'{prog}.py').write_text(py_file, encoding='utf-8')

    env['_OCRMYPDF_TEST_PATH'] = str(tmpdir) + os.pathsep + env['PATH']
    if os.name == 'nt':
        if '.py' not in env['PATHEXT'].lower():
            raise EnvironmentError("PATHEXT is not configured to support .py")
    return env


@pytest.fixture
def spoof_tesseract_noop(tmp_path_factory):
    return spoof(tmp_path_factory, tesseract='tesseract_noop.py')


@pytest.fixture
def spoof_tesseract_cache(tmp_path_factory):
    if running_in_docker():
        return os.environ.copy()
    return spoof(tmp_path_factory, tesseract="tesseract_cache.py")


@pytest.fixture
def resources():
    return Path(TESTS_ROOT) / 'resources'


@pytest.fixture
def ocrmypdf_exec():
    return OCRMYPDF


@pytest.fixture(scope="function")
def outdir(tmp_path):
    return tmp_path


@pytest.fixture(scope="function")
def outpdf(tmp_path):
    return tmp_path / 'out.pdf'


@pytest.fixture(scope="function")
def no_outpdf(tmp_path):
    """This just documents the fact that a test is not expected to produce
    output. Unfortunately an assertion failure inside a test fixture produces
    an error rather than a test failure, so no testing is done. It's up to
    the test to confirm that no output file was created."""
    return tmp_path / 'no_output.pdf'


@pytest.helpers.register
def check_ocrmypdf(input_file, output_file, *args, env=None):
    """Run ocrmypdf and confirmed that a valid file was created"""

    options = cli.get_parser().parse_args(
        [str(input_file), str(output_file)]
        + [str(arg) for arg in args if arg is not None]
    )
    api.check_options(options)
    if env:
        options.tesseract_env = env
        options.tesseract_env['_OCRMYPDF_TEST_INFILE'] = os.fspath(input_file)
    result = api.run_pipeline(options, plugin_manager=None, api=True)

    assert result == 0
    assert os.path.exists(str(output_file)), "Output file not created"
    assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"

    return output_file


@pytest.helpers.register
def run_ocrmypdf_api(input_file, output_file, *args, env=None):
    """Run ocrmypdf via API and let caller deal with results

    Does not currently have a way to manipulate the PATH except for Tesseract.
    """

    options = cli.get_parser().parse_args(
        [str(input_file), str(output_file)]
        + [str(arg) for arg in args if arg is not None]
    )
    api.check_options(options)
    if env:
        options.tesseract_env = env.copy()
        options.tesseract_env['_OCRMYPDF_TEST_INFILE'] = os.fspath(input_file)
        first_path = env.get('_OCRMYPDF_TEST_PATH', '').split(os.pathsep)[0]
        if 'spoof' in first_path:
            assert 'gs' not in first_path, "use run_ocrmypdf() for gs"
            assert 'tesseract' in first_path
    if options.tesseract_env:
        assert all(isinstance(v, (str, bytes)) for v in options.tesseract_env.values())

    return api.run_pipeline(options, plugin_manager=None, api=False)


@pytest.helpers.register
def run_ocrmypdf(input_file, output_file, *args, env=None, universal_newlines=True):
    "Run ocrmypdf and let caller deal with results"

    if env is None:
        env = os.environ.copy()

    p_args = (
        OCRMYPDF
        + [str(arg) for arg in args if arg is not None]
        + [str(input_file), str(output_file)]
    )

    # Tell subprocess where to find coverage.py configuration
    # This has no effect except when coverage is running
    # Details: https://coverage.readthedocs.io/en/coverage-5.0/subprocess.html
    coverage_rc = Path(__file__).parent.parent / '.coveragerc'
    assert coverage_rc.exists()
    env['COVERAGE_PROCESS_START'] = os.fspath(coverage_rc)

    p = run(
        p_args, stdout=PIPE, stderr=PIPE, universal_newlines=universal_newlines, env=env
    )
    # print(p.stderr)
    return p, p.stdout, p.stderr


@pytest.helpers.register
def first_page_dimensions(pdf):
    from ocrmypdf import pdfinfo

    info = pdfinfo.PdfInfo(pdf)
    page0 = info[0]
    return (page0.width_inches, page0.height_inches)


def pytest_addoption(parser):
    parser.addoption(
        "--runslow",
        action="store_true",
        default=False,
        help=(
            "run slow tests only useful for development (unlikely to be "
            "useful for downstream packagers)"
        ),
    )


def pytest_collection_modifyitems(config, items):
    if config.getoption("--runslow"):
        # --runslow given in cli: do not skip slow tests
        return
    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
    for item in items:
        if "slow" in item.keywords:
            item.add_marker(skip_slow)
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`# © 2017 James R. Barlow: github.com/jbarlow83`
Add license notice to all files Source files to GPL3 Exceptions: -tests/spoof/* to MIT -hocrtransform.py -_unicodefun.py Test resources to CC BY-SA 4.0 except when otherwise noted. Add GPL license. 2018-03-14 14:40:48 -07:00			`#`
			`# This file is part of OCRmyPDF.`
			`#`
			`# OCRmyPDF is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# OCRmyPDF is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.`
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00
Document function of symlink shim 2019-12-06 15:00:12 -08:00			`import ast`
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`import os`
			`import platform`
Sort imports with isort 2018-12-30 01:28:15 -08:00			`import sys`
Set up code coverage (it works with multiprocessing now!) 2018-11-02 00:31:50 -07:00			`from pathlib import Path`
Convert most uses of subprocess.Popen to subprocess.run in test suite 2019-03-05 22:25:22 -08:00			`from subprocess import PIPE, run`
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00
			`import pytest`

Sort imports 2019-12-19 15:29:56 -08:00			`from ocrmypdf import api, cli`

Sort imports with isort 2018-12-30 01:28:15 -08:00			`pytest_plugins = ['helpers_namespace']`

Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00
Fix several pylint errors and warnings 2018-06-23 00:54:22 -07:00			`# pylint: disable=E1101`
			`# pytest.helpers is dynamic so it confuses pylint`

Fix some error messages that printed directly to sys.stderr instead of logging 2019-06-05 03:07:48 -07:00			`if sys.version_info < (3, 5):`
			`print("Requires Python 3.5+")`
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`sys.exit(1)`


			`@pytest.helpers.register`
			`def is_linux():`
			`return platform.system() == 'Linux'`


Improvements to macOS test and work on homebrew tap autobrew Squashed commits: [3f06c1e] Try setting up homebrew tap autobuilding [01532f1] Strict mode error in brew 2017-03-02 22:27:06 -08:00			`@pytest.helpers.register`
			`def is_macos():`
			`return platform.system() == 'Darwin'`


Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`@pytest.helpers.register`
			`def running_in_docker():`
Fix running_in_docker() check failing on newer Docker This test has to work to ensure spoof/tesseract_cache.py has a writable directory to put cache into. Otherwise those tests fail. 2017-02-13 02:16:06 -08:00			`# Docker creates a file named /.dockerenv (newer versions) or`
			`# /.dockerinit (older) -- this is undocumented, not an offical test`
			`return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

Improvements to macOS test and work on homebrew tap autobrew Squashed commits: [3f06c1e] Try setting up homebrew tap autobuilding [01532f1] Strict mode error in brew 2017-03-02 22:27:06 -08:00			`@pytest.helpers.register`
			`def running_in_travis():`
			`return os.environ.get('TRAVIS') == 'true'`


Fix test suite so --clean is not requested when unpaper is not installed 2019-03-05 22:33:13 -08:00			`@pytest.helpers.register`
			`def have_unpaper():`
			`try:`
			`from ocrmypdf.exec import unpaper`

			`unpaper.version()`
			`except Exception:`
			`return False`
			`return True`


Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))`
			`SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')`
			`PROJECT_ROOT = os.path.dirname(TESTS_ROOT)`
			`OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']`


Document function of symlink shim 2019-12-06 15:00:12 -08:00			`WINDOWS_SHIM_TEMPLATE = """`
			`# This is a shim for Windows that has the same effect as a symlink to the target .py`
			`# file`
Use _OCRMYPDF_TEST_PATH for testing and .py stubs to simulate symlinks 2019-11-28 16:40:04 -08:00			`import os`
			`import subprocess`
			`import sys`

			`args = [sys.executable, {spoofer}, *sys.argv[1:]]`
			`p = subprocess.run(args, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)`
			`sys.stdout.buffer.write(p.stdout)`
			`sys.stderr.buffer.write(p.stderr)`
			`sys.exit(p.returncode)`
			`"""`

Document function of symlink shim 2019-12-06 15:00:12 -08:00			`assert ast.parse(WINDOWS_SHIM_TEMPLATE.format(spoofer=repr(r"C:\\Temp\\file.py")))`

Use _OCRMYPDF_TEST_PATH for testing and .py stubs to simulate symlinks 2019-11-28 16:40:04 -08:00
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`@pytest.helpers.register`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`def spoof(tmp_path_factory, **kwargs):`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`"""Modify PATH to override subprocess executables`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
Expand documentation for subprocess.run() from test 2020-03-04 13:37:44 -08:00			`spoof(tmp_path_factory, program1='replacement', ...)`

			`For the test suite we need a way override executables, so that we can`
			`substitute desired results such as errors or just speed up OCR.`

			`On POSIXish platforms we create a temporary folder with overrides that`
			`are symlinks to the executables we want to run. We do not actually override`
			`PATH. We also set an environment variable _OCRMYPDF_TEST_PATH, which`
			`OCRmyPDF's subprocess wrapper will check before they use regular PATH. The`
			`output is a folder full of executables we are overriding. We can override`
			`multiple executables. The end result is a folder we can use in a PATH-style`
			`lookup to override some executables:`

			`/tmp/abcxyz/tesseract -> ocrmypdf/tests/resources/spoof/tesseract_crash.py`
			`/tmp/abcxyz/gs -> ocrmypdf/tests/resources/spoof/gs_backflip.py`

			`Windows needs extra help from us because usually, only the Administrator`
			`can create symlinks. Instead we create small Python scripts that call`
			`the programs we want, implementing the effect of a symlink. This is cleaner`
			`than creating Windows executables or trying to use non-Python scripts.`
			`The temporary folder generated for Windows could like:`

			`%TEMP%\abcxyz\tesseract.py:`
			`(script that runs ocrmypdf/tests/resources/spoof/tesseract_crash.py)`
			`%TEMP%\abcxyz\gswin32c.py:`
			`(script that runs ocrmypdf/tests/resources/spoof/gs_backflip.py)`
			`%TEMP%\abcxyz\gswin64c.py:`
			`(script that runs ocrmypdf/tests/resources/spoof/gs_backflip.py)`

			`We also address one quirk here, that Ghostscript may be known as gswin32c`
			`or gswin64c, depending on what the user installed (regardless of Windows`
			`itself). On POSIX, Ghostscript is just 'gs'. We handle the special case here`
			`too.`

			`All of this is intimately dependent on the machinery in ocrmypdf.exec.run().`
			`In particular, for Windows, that code has to know that if there is a .py`
			`file, it needs to run it with Python, since Windows does not like being`
			`asked to execute files.`

			`We don't overload PATH directly because we have some tests where we call`
			`ocrmypdf as a subprocess (to exercise the command line interface) and some`
			`tests where we call it as an API.`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`"""`
			`env = os.environ.copy()`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`spoofer_base = tmp_path_factory.mktemp('spoofers')`
			`tmpdir = Path(spoofer_base / slug)`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`tmpdir.mkdir(parents=True)`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
			`for replace_program, with_spoof in kwargs.items():`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`spoofer = Path(SPOOF_PATH) / with_spoof`
Use _OCRMYPDF_TEST_PATH for testing and .py stubs to simulate symlinks 2019-11-28 16:40:04 -08:00			`if os.name != 'nt':`
			`spoofer.chmod(0o755)`
			`(tmpdir / replace_program).symlink_to(spoofer)`
			`else:`
Document function of symlink shim 2019-12-06 15:00:12 -08:00			`py_file = WINDOWS_SHIM_TEMPLATE.format(`
			`spoofer=repr(os.fspath(spoofer.absolute()))`
Use _OCRMYPDF_TEST_PATH for testing and .py stubs to simulate symlinks 2019-11-28 16:40:04 -08:00			`)`
			`if replace_program == 'gs':`
			`programs = ['gswin64c', 'gswin32c']`
			`else:`
			`programs = [replace_program]`
			`for prog in programs:`
			`(tmpdir / f'{prog}.py').write_text(py_file, encoding='utf-8')`

			`env['_OCRMYPDF_TEST_PATH'] = str(tmpdir) + os.pathsep + env['PATH']`
			`if os.name == 'nt':`
			`if '.py' not in env['PATHEXT'].lower():`
			`raise EnvironmentError("PATHEXT is not configured to support .py")`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`return env`


Remove session scope from fixtures pytest seems to prepare os.environ in complex ways, so we want to ensure these fixtures are not reused. 2019-12-31 17:09:23 -08:00			`@pytest.fixture`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`def spoof_tesseract_noop(tmp_path_factory):`
			`return spoof(tmp_path_factory, tesseract='tesseract_noop.py')`
Refactor common test fixtures 2017-05-29 12:47:55 -07:00

Remove session scope from fixtures pytest seems to prepare os.environ in complex ways, so we want to ensure these fixtures are not reused. 2019-12-31 17:09:23 -08:00			`@pytest.fixture`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`def spoof_tesseract_cache(tmp_path_factory):`
Refactor common test fixtures 2017-05-29 12:47:55 -07:00			`if running_in_docker():`
			`return os.environ.copy()`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`return spoof(tmp_path_factory, tesseract="tesseract_cache.py")`
Refactor common test fixtures 2017-05-29 12:47:55 -07:00

Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`@pytest.fixture`
			`def resources():`
			`return Path(TESTS_ROOT) / 'resources'`


			`@pytest.fixture`
			`def ocrmypdf_exec():`
			`return OCRMYPDF`


			`@pytest.fixture(scope="function")`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`def outdir(tmp_path):`
			`return tmp_path`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

			`@pytest.fixture(scope="function")`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`def outpdf(tmp_path):`
			`return tmp_path / 'out.pdf'`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

			`@pytest.fixture(scope="function")`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`def no_outpdf(tmp_path):`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`"""This just documents the fact that a test is not expected to produce`
			`output. Unfortunately an assertion failure inside a test fixture produces`
			`an error rather than a test failure, so no testing is done. It's up to`
			`the test to confirm that no output file was created."""`
Use newer pytest tmp_path API 2019-06-01 01:55:51 -07:00			`return tmp_path / 'no_output.pdf'`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

			`@pytest.helpers.register`
			`def check_ocrmypdf(input_file, output_file, *args, env=None):`
unpaper-args: add test case and harden feature 2019-02-07 16:21:02 -08:00			`"""Run ocrmypdf and confirmed that a valid file was created"""`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
Support plugin invocation with API 2020-05-02 03:34:31 -07:00			`options = cli.get_parser().parse_args(`
tests: fix interpretation of None as omitted argument 2019-08-11 16:16:10 -07:00			`[str(input_file), str(output_file)]`
			`+ [str(arg) for arg in args if arg is not None]`
Change most tests to use ocrmypdf API instead of subprocess The main benefit of this is code coverage gains can actually follow it. Also removes most ugly os.environ hacks. 2019-06-03 01:45:27 -07:00			`)`
			`api.check_options(options)`
			`if env:`
			`options.tesseract_env = env`
Fix TypeError "environment can only contain strings" Apparently Windows Python doesn't coerce pathlib.Path to str. 2019-11-19 18:01:10 -08:00			`options.tesseract_env['_OCRMYPDF_TEST_INFILE'] = os.fspath(input_file)`
Support plugin invocation with API 2020-05-02 03:34:31 -07:00			`result = api.run_pipeline(options, plugin_manager=None, api=True)`
Change most tests to use ocrmypdf API instead of subprocess The main benefit of this is code coverage gains can actually follow it. Also removes most ugly os.environ hacks. 2019-06-03 01:45:27 -07:00
			`assert result == 0`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`assert os.path.exists(str(output_file)), "Output file not created"`
			`assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"`
Change most tests to use ocrmypdf API instead of subprocess The main benefit of this is code coverage gains can actually follow it. Also removes most ugly os.environ hacks. 2019-06-03 01:45:27 -07:00
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`return output_file`


test: Replace many instances of run_ocrmypdf in subprocess with inline 2019-11-28 14:03:18 -08:00			`@pytest.helpers.register`
			`def run_ocrmypdf_api(input_file, output_file, *args, env=None):`
test: environment warnings/cleanup 2019-12-30 22:38:50 -08:00			`"""Run ocrmypdf via API and let caller deal with results`

			`Does not currently have a way to manipulate the PATH except for Tesseract.`
			`"""`
test: Replace many instances of run_ocrmypdf in subprocess with inline 2019-11-28 14:03:18 -08:00
Support plugin invocation with API 2020-05-02 03:34:31 -07:00			`options = cli.get_parser().parse_args(`
test: Replace many instances of run_ocrmypdf in subprocess with inline 2019-11-28 14:03:18 -08:00			`[str(input_file), str(output_file)]`
			`+ [str(arg) for arg in args if arg is not None]`
			`)`
			`api.check_options(options)`
			`if env:`
Use _OCRMYPDF_TEST_PATH for testing and .py stubs to simulate symlinks 2019-11-28 16:40:04 -08:00			`options.tesseract_env = env.copy()`
test: Replace many instances of run_ocrmypdf in subprocess with inline 2019-11-28 14:03:18 -08:00			`options.tesseract_env['_OCRMYPDF_TEST_INFILE'] = os.fspath(input_file)`
test: environment warnings/cleanup 2019-12-30 22:38:50 -08:00			`first_path = env.get('_OCRMYPDF_TEST_PATH', '').split(os.pathsep)[0]`
			`if 'spoof' in first_path:`
			`assert 'gs' not in first_path, "use run_ocrmypdf() for gs"`
			`assert 'tesseract' in first_path`
Enforce str-only environment for Windows since it's more strict 2019-11-28 14:44:32 -08:00			`if options.tesseract_env:`
			`assert all(isinstance(v, (str, bytes)) for v in options.tesseract_env.values())`
test: Replace many instances of run_ocrmypdf in subprocess with inline 2019-11-28 14:03:18 -08:00
Support plugin invocation with API 2020-05-02 03:34:31 -07:00			`return api.run_pipeline(options, plugin_manager=None, api=False)`
test: Replace many instances of run_ocrmypdf in subprocess with inline 2019-11-28 14:03:18 -08:00

Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`@pytest.helpers.register`
Reformat with black 2018-12-30 01:27:49 -08:00			`def run_ocrmypdf(input_file, output_file, *args, env=None, universal_newlines=True):`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`"Run ocrmypdf and let caller deal with results"`

			`if env is None:`
Try to set up subprocess coverage better 2019-12-31 15:39:45 -08:00			`env = os.environ.copy()`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
Convert most uses of subprocess.Popen to subprocess.run in test suite 2019-03-05 22:25:22 -08:00			`p_args = (`
			`OCRMYPDF`
			`+ [str(arg) for arg in args if arg is not None]`
			`+ [str(input_file), str(output_file)]`
Reformat with black 2018-12-30 01:27:49 -08:00			`)`
Try to set up subprocess coverage better 2019-12-31 15:39:45 -08:00
			`# Tell subprocess where to find coverage.py configuration`
			`# This has no effect except when coverage is running`
			`# Details: https://coverage.readthedocs.io/en/coverage-5.0/subprocess.html`
			`coverage_rc = Path(__file__).parent.parent / '.coveragerc'`
			`assert coverage_rc.exists()`
			`env['COVERAGE_PROCESS_START'] = os.fspath(coverage_rc)`

Convert most uses of subprocess.Popen to subprocess.run in test suite 2019-03-05 22:25:22 -08:00			`p = run(`
			`p_args, stdout=PIPE, stderr=PIPE, universal_newlines=universal_newlines, env=env`
			`)`
			`# print(p.stderr)`
			`return p, p.stdout, p.stderr`
Fix issue #147: unpaper loses DPI information, affects —pdf-renderer tess4 2017-03-24 13:23:03 -07:00

			`@pytest.helpers.register`
			`def first_page_dimensions(pdf):`
Rename pageinfo to pdfinfo 2017-05-19 15:48:23 -07:00			`from ocrmypdf import pdfinfo`
Reformat with black 2018-12-30 01:27:49 -08:00
Rename pageinfo to pdfinfo 2017-05-19 15:48:23 -07:00			`info = pdfinfo.PdfInfo(pdf)`
Fix issue #147: unpaper loses DPI information, affects —pdf-renderer tess4 2017-03-24 13:23:03 -07:00			`page0 = info[0]`
pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`return (page0.width_inches, page0.height_inches)`
Add intensive (optional) rotation test 2018-08-03 00:42:59 -07:00

			`def pytest_addoption(parser):`
			`parser.addoption(`
Reformat with black 2018-12-30 01:27:49 -08:00			`"--runslow",`
			`action="store_true",`
			`default=False,`
			`help=(`
			`"run slow tests only useful for development (unlikely to be "`
			`"useful for downstream packagers)"`
			`),`
Add intensive (optional) rotation test 2018-08-03 00:42:59 -07:00			`)`


			`def pytest_collection_modifyitems(config, items):`
			`if config.getoption("--runslow"):`
			`# --runslow given in cli: do not skip slow tests`
			`return`
			`skip_slow = pytest.mark.skip(reason="need --runslow option to run")`
			`for item in items:`
			`if "slow" in item.keywords:`
			`item.add_marker(skip_slow)`