tests: improve typing and remove some legacy code

2025-08-18 13:42:12 +00:00 · 2021-12-05 23:32:38 -08:00 · 2021-12-05 23:32:38 -08:00 · 8fdcb15b4e
commit 8fdcb15b4e
parent 0323738ada
2 changed files with 47 additions and 29 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -75,6 +75,6 @@ filterwarnings = ["ignore:.*XMLParser.*:DeprecationWarning"]
 [[tool.mypy.overrides]]
 module = [
-  'pluggy', 'tqdm', 'coloredlogs', 'img2pdf', 'pdfminer.*', 'reportlab.*'
+  'pluggy', 'tqdm', 'coloredlogs', 'img2pdf', 'pdfminer.*', 'reportlab.*', 'fitz', 'libxmp.utils'
 ]
 ignore_missing_imports = true
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -9,17 +9,15 @@ import os
 import platform
 import sys
 from pathlib import Path
-from subprocess import PIPE, run
+from subprocess import PIPE, CompletedProcess, run
 from typing import AnyStr, List, Literal, Tuple, overload
 import pytest
 from ocrmypdf import api, pdfinfo
 from ocrmypdf._exec import unpaper
 from ocrmypdf._plugin_manager import get_parser_options_plugins
-
+from ocrmypdf.exceptions import ExitCode
 if sys.version_info < (3, 5):
    print("Requires Python 3.5+")
    sys.exit(1)
 def is_linux():
@ -46,36 +44,35 @@ def have_unpaper():
 TESTS_ROOT = Path(__file__).parent.resolve()
 PROJECT_ROOT = TESTS_ROOT
 OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']
@pytest.fixture
-def resources():
+def resources() -> Path:
    return Path(TESTS_ROOT) / 'resources'
@pytest.fixture
-def ocrmypdf_exec():
+def ocrmypdf_exec() -> List[str]:
-    return OCRMYPDF
+    return [sys.executable, '-m', 'ocrmypdf']
@pytest.fixture(scope="function")
-def outdir(tmp_path):
+def outdir(tmp_path) -> Path:
    return tmp_path
@pytest.fixture(scope="function")
-def outpdf(tmp_path):
+def outpdf(tmp_path) -> Path:
    return tmp_path / 'out.pdf'
@pytest.fixture(scope="function")
-def outtxt(tmp_path):
+def outtxt(tmp_path) -> Path:
    return tmp_path / 'out.txt'
@pytest.fixture(scope="function")
-def no_outpdf(tmp_path):
+def no_outpdf(tmp_path) -> Path:
    """This just documents the fact that a test is not expected to produce
    output. Unfortunately an assertion failure inside a test fixture produces
    an error rather than a test failure, so no testing is done. It's up to
@ -83,13 +80,13 @@ def no_outpdf(tmp_path):
    return tmp_path / 'no_output.pdf'
-def check_ocrmypdf(input_file, output_file, *args):
+def check_ocrmypdf(input_file: Path, output_file: Path, *args) -> Path:
-    """Run ocrmypdf and confirmed that a valid file was created"""
+    """Run ocrmypdf and confirm that a valid plausible PDF was created."""
-    args = [str(input_file), str(output_file)] + [
+    api_args = [str(input_file), str(output_file)] + [
        str(arg) for arg in args if arg is not None
    ]
-    _parser, options, plugin_manager = get_parser_options_plugins(args=args)
+    _parser, options, plugin_manager = get_parser_options_plugins(args=api_args)
    api.check_options(options, plugin_manager)
    result = api.run_pipeline(options, plugin_manager=plugin_manager, api=True)
@ -100,44 +97,65 @@ def check_ocrmypdf(input_file, output_file, *args):
    return output_file
-def run_ocrmypdf_api(input_file, output_file, *args):
+def run_ocrmypdf_api(input_file: Path, output_file: Path, *args) -> ExitCode:
-    """Run ocrmypdf via API and let caller deal with results
+    """Run ocrmypdf via its API in-process, and let test deal with results.
-    Does not currently have a way to manipulate the PATH except for Tesseract.
+    This simulates calling the command line interface in a subprocess, but
    is easier for debuggers and code coverage to follow.
    Any exception raised will be trapped and converted to an exit code.
    The return code must always be checked or the test may declare a failure
    to be pass.
    """
-    args = [str(input_file), str(output_file)] + [
+    api_args = [str(input_file), str(output_file)] + [
        str(arg) for arg in args if arg is not None
    ]
-    _parser, options, plugin_manager = get_parser_options_plugins(args=args)
+    _parser, options, plugin_manager = get_parser_options_plugins(args=api_args)
    api.check_options(options, plugin_manager)
    return api.run_pipeline(options, plugin_manager=None, api=False)
-def run_ocrmypdf(input_file, output_file, *args, text=True):
+@overload
-    "Run ocrmypdf and let caller deal with results"
+def run_ocrmypdf(
    input_file: Path, output_file: Path, *args, text: Literal[True] = True
 ) -> Tuple[CompletedProcess, str, str]:
    ...
@overload
 def run_ocrmypdf(
    input_file: Path, output_file: Path, *args, text: Literal[False]
 ) -> Tuple[CompletedProcess, bytes, bytes]:
    ...
 def run_ocrmypdf(input_file: Path, output_file: Path, *args, text: bool = True):
    """Run ocrmypdf in a subprocess and let test deal with results.
    If an exception is thrown this fact will be returned as part of the result
    text and return code rather than exception objects.
    """
    p_args = (
-        OCRMYPDF
+        [sys.executable, '-m', 'ocrmypdf']
        + [str(arg) for arg in args if arg is not None]
        + [str(input_file), str(output_file)]
    )
    env = os.environ.copy()
    p = run(
        p_args,
        stdout=PIPE,
        stderr=PIPE,
        text=text,
        env=env,
        check=False,
    )
    # print(p.stderr)
    return p, p.stdout, p.stderr
-def first_page_dimensions(pdf):
+def first_page_dimensions(pdf: Path):
    info = pdfinfo.PdfInfo(pdf)
    page0 = info[0]
    return (page0.width_inches, page0.height_inches)