OCRmyPDF/tests/conftest.py

# © 2017 James R. Barlow: github.com/jbarlow83
#
# This file is part of OCRmyPDF.
#
# OCRmyPDF is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OCRmyPDF is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.

import sys
import os
import platform

pytest_plugins = ['helpers_namespace']

import pytest
from pathlib import Path
from subprocess import Popen, PIPE


# pylint: disable=E1101
# pytest.helpers is dynamic so it confuses pylint

if sys.version_info.major < 3:
    print("Requires Python 3.4+")
    sys.exit(1)


@pytest.helpers.register
def is_linux():
    return platform.system() == 'Linux'


@pytest.helpers.register
def is_macos():
    return platform.system() == 'Darwin'


@pytest.helpers.register
def running_in_docker():
    # Docker creates a file named /.dockerenv (newer versions) or
    # /.dockerinit (older) -- this is undocumented, not an offical test
    return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')


@pytest.helpers.register
def running_in_travis():
    return os.environ.get('TRAVIS') == 'true'


TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']


@pytest.helpers.register
def spoof(tmpdir_factory, **kwargs):
    """Modify PATH to override subprocess executables

    spoof(program1='replacement', ...)

    Creates temporary directory with symlinks to targets.

    """
    env = os.environ.copy()
    slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))
    spoofer_base = Path(str(tmpdir_factory.mktemp('spoofers')))
    tmpdir = spoofer_base / slug
    tmpdir.mkdir(parents=True)

    for replace_program, with_spoof in kwargs.items():
        spoofer = Path(SPOOF_PATH) / with_spoof
        spoofer.chmod(0o755)
        (tmpdir / replace_program).symlink_to(spoofer)

    env['_OCRMYPDF_SAVE_PATH'] = env['PATH']
    env['PATH'] = str(tmpdir) + ":" + env['PATH']

    return env


@pytest.fixture(scope='session')
def spoof_tesseract_noop(tmpdir_factory):
    return spoof(tmpdir_factory, tesseract='tesseract_noop.py')


@pytest.fixture(scope='session')
def spoof_tesseract_cache(tmpdir_factory):
    if running_in_docker():
        return os.environ.copy()
    return spoof(tmpdir_factory, tesseract="tesseract_cache.py")


@pytest.fixture
def resources():
    return Path(TESTS_ROOT) / 'resources'


@pytest.fixture
def ocrmypdf_exec():
    return OCRMYPDF


@pytest.fixture(scope="function")
def outdir(tmpdir):
    return Path(str(tmpdir))


@pytest.fixture(scope="function")
def outpdf(tmpdir):
    return str(Path(str(tmpdir)) / 'out.pdf')


@pytest.fixture(scope="function")
def no_outpdf(tmpdir):
    """This just documents the fact that a test is not expected to produce
    output. Unfortunately an assertion failure inside a test fixture produces
    an error rather than a test failure, so no testing is done. It's up to
    the test to confirm that no output file was created."""
    return str(Path(str(tmpdir)) / 'no_output.pdf')


@pytest.helpers.register
def check_ocrmypdf(input_file, output_file, *args, env=None):
    "Run ocrmypdf and confirmed that a valid file was created"

    p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env)
    # ensure py.test collects the output, use -s to view
    print(err, file=sys.stderr)
    assert p.returncode == 0
    assert os.path.exists(str(output_file)), "Output file not created"
    assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"
    assert out == "", \
        "The following was written to stdout and should not have been: \n" + \
        "<stdout>\n" + out + "\n</stdout>"
    return output_file


@pytest.helpers.register
def run_ocrmypdf(input_file, output_file, *args, env=None,
        universal_newlines=True):
    "Run ocrmypdf and let caller deal with results"

    if env is None:
        env = os.environ

    p_args = OCRMYPDF + [str(arg) for arg in args] + \
             [str(input_file), str(output_file)]
    p = Popen(
        p_args, close_fds=True, stdout=PIPE, stderr=PIPE,
        universal_newlines=universal_newlines, env=env)
    out, err = p.communicate()
    #print(err)

    return p, out, err


@pytest.helpers.register
def first_page_dimensions(pdf):
    from ocrmypdf import pdfinfo
    info = pdfinfo.PdfInfo(pdf)
    page0 = info[0]
    return (page0.width_inches, page0.height_inches)


def pytest_addoption(parser):
    parser.addoption(
        "--runslow", action="store_true", default=False,
        help=("run slow tests only useful for development (unlikely to be "
              "useful for downstream packagers)")
    )


def pytest_collection_modifyitems(config, items):
    if config.getoption("--runslow"):
        # --runslow given in cli: do not skip slow tests
        return
    skip_slow = pytest.mark.skip(reason="need --runslow option to run")
    for item in items:
        if "slow" in item.keywords:
            item.add_marker(skip_slow)
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`# © 2017 James R. Barlow: github.com/jbarlow83`
Add license notice to all files Source files to GPL3 Exceptions: -tests/spoof/* to MIT -hocrtransform.py -_unicodefun.py Test resources to CC BY-SA 4.0 except when otherwise noted. Add GPL license. 2018-03-14 14:40:48 -07:00			`#`
			`# This file is part of OCRmyPDF.`
			`#`
			`# OCRmyPDF is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# OCRmyPDF is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with OCRmyPDF. If not, see <http://www.gnu.org/licenses/>.`
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00
			`import sys`
			`import os`
			`import platform`

			`pytest_plugins = ['helpers_namespace']`

			`import pytest`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`from pathlib import Path`
			`from subprocess import Popen, PIPE`
Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00

Fix several pylint errors and warnings 2018-06-23 00:54:22 -07:00			`# pylint: disable=E1101`
			`# pytest.helpers is dynamic so it confuses pylint`

Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`if sys.version_info.major < 3:`
			`print("Requires Python 3.4+")`
			`sys.exit(1)`


			`@pytest.helpers.register`
			`def is_linux():`
			`return platform.system() == 'Linux'`


Improvements to macOS test and work on homebrew tap autobrew Squashed commits: [3f06c1e] Try setting up homebrew tap autobuilding [01532f1] Strict mode error in brew 2017-03-02 22:27:06 -08:00			`@pytest.helpers.register`
			`def is_macos():`
			`return platform.system() == 'Darwin'`


Move duplicate test code into common namespace 2017-01-26 13:22:01 -08:00			`@pytest.helpers.register`
			`def running_in_docker():`
Fix running_in_docker() check failing on newer Docker This test has to work to ensure spoof/tesseract_cache.py has a writable directory to put cache into. Otherwise those tests fail. 2017-02-13 02:16:06 -08:00			`# Docker creates a file named /.dockerenv (newer versions) or`
			`# /.dockerinit (older) -- this is undocumented, not an offical test`
			`return os.path.exists('/.dockerenv') or os.path.exists('/.dockerinit')`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

Improvements to macOS test and work on homebrew tap autobrew Squashed commits: [3f06c1e] Try setting up homebrew tap autobuilding [01532f1] Strict mode error in brew 2017-03-02 22:27:06 -08:00			`@pytest.helpers.register`
			`def running_in_travis():`
			`return os.environ.get('TRAVIS') == 'true'`


Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))`
			`SPOOF_PATH = os.path.join(TESTS_ROOT, 'spoof')`
			`PROJECT_ROOT = os.path.dirname(TESTS_ROOT)`
			`OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']`


			`@pytest.helpers.register`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`def spoof(tmpdir_factory, **kwargs):`
			`"""Modify PATH to override subprocess executables`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
			`spoof(program1='replacement', ...)`

Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`Creates temporary directory with symlinks to targets.`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
			`"""`
			`env = os.environ.copy()`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`slug = '-'.join(v.replace('.py', '') for v in sorted(kwargs.values()))`
conftest: py3.5 path issue 2018-03-25 00:52:45 -07:00			`spoofer_base = Path(str(tmpdir_factory.mktemp('spoofers')))`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`tmpdir = spoofer_base / slug`
			`tmpdir.mkdir(parents=True)`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00
			`for replace_program, with_spoof in kwargs.items():`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`spoofer = Path(SPOOF_PATH) / with_spoof`
			`spoofer.chmod(0o755)`
			`(tmpdir / replace_program).symlink_to(spoofer)`

			`env['_OCRMYPDF_SAVE_PATH'] = env['PATH']`
			`env['PATH'] = str(tmpdir) + ":" + env['PATH']`
Fix several pylint errors and warnings 2018-06-23 00:54:22 -07:00
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`return env`


Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`@pytest.fixture(scope='session')`
			`def spoof_tesseract_noop(tmpdir_factory):`
			`return spoof(tmpdir_factory, tesseract='tesseract_noop.py')`
Refactor common test fixtures 2017-05-29 12:47:55 -07:00

Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`@pytest.fixture(scope='session')`
			`def spoof_tesseract_cache(tmpdir_factory):`
Refactor common test fixtures 2017-05-29 12:47:55 -07:00			`if running_in_docker():`
			`return os.environ.copy()`
Remove the OCRMYPDF_program environment variables Really, this was just replicating the functionality of the PATH environment variable, and users probably do that anyway. 2018-03-24 15:07:02 -07:00			`return spoof(tmpdir_factory, tesseract="tesseract_cache.py")`
Refactor common test fixtures 2017-05-29 12:47:55 -07:00

Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`@pytest.fixture`
			`def resources():`
			`return Path(TESTS_ROOT) / 'resources'`


			`@pytest.fixture`
			`def ocrmypdf_exec():`
			`return OCRMYPDF`


			`@pytest.fixture(scope="function")`
			`def outdir(tmpdir):`
(Hopefully) Fix Path <-> py.path conversion on Py3.4/3.5 2017-01-26 17:19:15 -08:00			`return Path(str(tmpdir))`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

			`@pytest.fixture(scope="function")`
			`def outpdf(tmpdir):`
(Hopefully) Fix Path <-> py.path conversion on Py3.4/3.5 2017-01-26 17:19:15 -08:00			`return str(Path(str(tmpdir)) / 'out.pdf')`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

			`@pytest.fixture(scope="function")`
			`def no_outpdf(tmpdir):`
			`"""This just documents the fact that a test is not expected to produce`
			`output. Unfortunately an assertion failure inside a test fixture produces`
			`an error rather than a test failure, so no testing is done. It's up to`
			`the test to confirm that no output file was created."""`
(Hopefully) Fix Path <-> py.path conversion on Py3.4/3.5 2017-01-26 17:19:15 -08:00			`return str(Path(str(tmpdir)) / 'no_output.pdf')`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00

			`@pytest.helpers.register`
			`def check_ocrmypdf(input_file, output_file, *args, env=None):`
			`"Run ocrmypdf and confirmed that a valid file was created"`

			`p, out, err = run_ocrmypdf(input_file, output_file, *args, env=env)`
Test: send stderr to stderr, why don't we? 2018-10-03 14:23:34 -07:00			`# ensure py.test collects the output, use -s to view`
			`print(err, file=sys.stderr)`
spoof: Allow tesseract cache to share cache Previous incarnation was only suitable for generating a local cache where the suite was executed repeatedly. Now the cache ignores differences, so it can be checked into Github and shared. 2018-03-24 22:17:36 -07:00			`assert p.returncode == 0`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`assert os.path.exists(str(output_file)), "Output file not created"`
			`assert os.stat(str(output_file)).st_size > 100, "PDF too small or empty"`
			`assert out == "", \`
			`"The following was written to stdout and should not have been: \n" + \`
			`"<stdout>\n" + out + "\n</stdout>"`
			`return output_file`


			`@pytest.helpers.register`
Add intensive (optional) rotation test 2018-08-03 00:42:59 -07:00			`def run_ocrmypdf(input_file, output_file, *args, env=None,`
			`universal_newlines=True):`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`"Run ocrmypdf and let caller deal with results"`

			`if env is None:`
			`env = os.environ`

Tests: accept rich path objects without having to str() everything 2017-07-21 16:39:22 -07:00			`p_args = OCRMYPDF + [str(arg) for arg in args] + \`
			`[str(input_file), str(output_file)]`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`p = Popen(`
			`p_args, close_fds=True, stdout=PIPE, stderr=PIPE,`
Add intensive (optional) rotation test 2018-08-03 00:42:59 -07:00			`universal_newlines=universal_newlines, env=env)`
Refactor test suite to use fixtures to manage paths 2017-01-26 16:38:59 -08:00			`out, err = p.communicate()`
			`#print(err)`

			`return p, out, err`
Fix issue #147: unpaper loses DPI information, affects —pdf-renderer tess4 2017-03-24 13:23:03 -07:00

			`@pytest.helpers.register`
			`def first_page_dimensions(pdf):`
Rename pageinfo to pdfinfo 2017-05-19 15:48:23 -07:00			`from ocrmypdf import pdfinfo`
			`info = pdfinfo.PdfInfo(pdf)`
Fix issue #147: unpaper loses DPI information, affects —pdf-renderer tess4 2017-03-24 13:23:03 -07:00			`page0 = info[0]`
pdfinfo: replace most remaining dict-style access 2017-05-19 16:17:36 -07:00			`return (page0.width_inches, page0.height_inches)`
Add intensive (optional) rotation test 2018-08-03 00:42:59 -07:00

			`def pytest_addoption(parser):`
			`parser.addoption(`
Explain pytest --runslow 2018-08-03 00:57:59 -07:00			`"--runslow", action="store_true", default=False,`
			`help=("run slow tests only useful for development (unlikely to be "`
			`"useful for downstream packagers)")`
Add intensive (optional) rotation test 2018-08-03 00:42:59 -07:00			`)`


			`def pytest_collection_modifyitems(config, items):`
			`if config.getoption("--runslow"):`
			`# --runslow given in cli: do not skip slow tests`
			`return`
			`skip_slow = pytest.mark.skip(reason="need --runslow option to run")`
			`for item in items:`
			`if "slow" in item.keywords:`
			`item.add_marker(skip_slow)`