mirror of
https://github.com/ocrmypdf/OCRmyPDF.git
synced 2025-08-18 05:31:38 +00:00
tests: improve typing and remove some legacy code
This commit is contained in:
parent
0323738ada
commit
8fdcb15b4e
@ -75,6 +75,6 @@ filterwarnings = ["ignore:.*XMLParser.*:DeprecationWarning"]
|
|||||||
|
|
||||||
[[tool.mypy.overrides]]
|
[[tool.mypy.overrides]]
|
||||||
module = [
|
module = [
|
||||||
'pluggy', 'tqdm', 'coloredlogs', 'img2pdf', 'pdfminer.*', 'reportlab.*'
|
'pluggy', 'tqdm', 'coloredlogs', 'img2pdf', 'pdfminer.*', 'reportlab.*', 'fitz', 'libxmp.utils'
|
||||||
]
|
]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
|
@ -9,17 +9,15 @@ import os
|
|||||||
import platform
|
import platform
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from subprocess import PIPE, run
|
from subprocess import PIPE, CompletedProcess, run
|
||||||
|
from typing import AnyStr, List, Literal, Tuple, overload
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ocrmypdf import api, pdfinfo
|
from ocrmypdf import api, pdfinfo
|
||||||
from ocrmypdf._exec import unpaper
|
from ocrmypdf._exec import unpaper
|
||||||
from ocrmypdf._plugin_manager import get_parser_options_plugins
|
from ocrmypdf._plugin_manager import get_parser_options_plugins
|
||||||
|
from ocrmypdf.exceptions import ExitCode
|
||||||
if sys.version_info < (3, 5):
|
|
||||||
print("Requires Python 3.5+")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
def is_linux():
|
def is_linux():
|
||||||
@ -46,36 +44,35 @@ def have_unpaper():
|
|||||||
|
|
||||||
TESTS_ROOT = Path(__file__).parent.resolve()
|
TESTS_ROOT = Path(__file__).parent.resolve()
|
||||||
PROJECT_ROOT = TESTS_ROOT
|
PROJECT_ROOT = TESTS_ROOT
|
||||||
OCRMYPDF = [sys.executable, '-m', 'ocrmypdf']
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def resources():
|
def resources() -> Path:
|
||||||
return Path(TESTS_ROOT) / 'resources'
|
return Path(TESTS_ROOT) / 'resources'
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def ocrmypdf_exec():
|
def ocrmypdf_exec() -> List[str]:
|
||||||
return OCRMYPDF
|
return [sys.executable, '-m', 'ocrmypdf']
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def outdir(tmp_path):
|
def outdir(tmp_path) -> Path:
|
||||||
return tmp_path
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def outpdf(tmp_path):
|
def outpdf(tmp_path) -> Path:
|
||||||
return tmp_path / 'out.pdf'
|
return tmp_path / 'out.pdf'
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def outtxt(tmp_path):
|
def outtxt(tmp_path) -> Path:
|
||||||
return tmp_path / 'out.txt'
|
return tmp_path / 'out.txt'
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def no_outpdf(tmp_path):
|
def no_outpdf(tmp_path) -> Path:
|
||||||
"""This just documents the fact that a test is not expected to produce
|
"""This just documents the fact that a test is not expected to produce
|
||||||
output. Unfortunately an assertion failure inside a test fixture produces
|
output. Unfortunately an assertion failure inside a test fixture produces
|
||||||
an error rather than a test failure, so no testing is done. It's up to
|
an error rather than a test failure, so no testing is done. It's up to
|
||||||
@ -83,13 +80,13 @@ def no_outpdf(tmp_path):
|
|||||||
return tmp_path / 'no_output.pdf'
|
return tmp_path / 'no_output.pdf'
|
||||||
|
|
||||||
|
|
||||||
def check_ocrmypdf(input_file, output_file, *args):
|
def check_ocrmypdf(input_file: Path, output_file: Path, *args) -> Path:
|
||||||
"""Run ocrmypdf and confirmed that a valid file was created"""
|
"""Run ocrmypdf and confirm that a valid plausible PDF was created."""
|
||||||
args = [str(input_file), str(output_file)] + [
|
api_args = [str(input_file), str(output_file)] + [
|
||||||
str(arg) for arg in args if arg is not None
|
str(arg) for arg in args if arg is not None
|
||||||
]
|
]
|
||||||
|
|
||||||
_parser, options, plugin_manager = get_parser_options_plugins(args=args)
|
_parser, options, plugin_manager = get_parser_options_plugins(args=api_args)
|
||||||
api.check_options(options, plugin_manager)
|
api.check_options(options, plugin_manager)
|
||||||
result = api.run_pipeline(options, plugin_manager=plugin_manager, api=True)
|
result = api.run_pipeline(options, plugin_manager=plugin_manager, api=True)
|
||||||
|
|
||||||
@ -100,44 +97,65 @@ def check_ocrmypdf(input_file, output_file, *args):
|
|||||||
return output_file
|
return output_file
|
||||||
|
|
||||||
|
|
||||||
def run_ocrmypdf_api(input_file, output_file, *args):
|
def run_ocrmypdf_api(input_file: Path, output_file: Path, *args) -> ExitCode:
|
||||||
"""Run ocrmypdf via API and let caller deal with results
|
"""Run ocrmypdf via its API in-process, and let test deal with results.
|
||||||
|
|
||||||
Does not currently have a way to manipulate the PATH except for Tesseract.
|
This simulates calling the command line interface in a subprocess, but
|
||||||
|
is easier for debuggers and code coverage to follow.
|
||||||
|
|
||||||
|
Any exception raised will be trapped and converted to an exit code.
|
||||||
|
The return code must always be checked or the test may declare a failure
|
||||||
|
to be pass.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
args = [str(input_file), str(output_file)] + [
|
api_args = [str(input_file), str(output_file)] + [
|
||||||
str(arg) for arg in args if arg is not None
|
str(arg) for arg in args if arg is not None
|
||||||
]
|
]
|
||||||
_parser, options, plugin_manager = get_parser_options_plugins(args=args)
|
_parser, options, plugin_manager = get_parser_options_plugins(args=api_args)
|
||||||
|
|
||||||
api.check_options(options, plugin_manager)
|
api.check_options(options, plugin_manager)
|
||||||
return api.run_pipeline(options, plugin_manager=None, api=False)
|
return api.run_pipeline(options, plugin_manager=None, api=False)
|
||||||
|
|
||||||
|
|
||||||
def run_ocrmypdf(input_file, output_file, *args, text=True):
|
@overload
|
||||||
"Run ocrmypdf and let caller deal with results"
|
def run_ocrmypdf(
|
||||||
|
input_file: Path, output_file: Path, *args, text: Literal[True] = True
|
||||||
|
) -> Tuple[CompletedProcess, str, str]:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
@overload
|
||||||
|
def run_ocrmypdf(
|
||||||
|
input_file: Path, output_file: Path, *args, text: Literal[False]
|
||||||
|
) -> Tuple[CompletedProcess, bytes, bytes]:
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
def run_ocrmypdf(input_file: Path, output_file: Path, *args, text: bool = True):
|
||||||
|
"""Run ocrmypdf in a subprocess and let test deal with results.
|
||||||
|
|
||||||
|
If an exception is thrown this fact will be returned as part of the result
|
||||||
|
text and return code rather than exception objects.
|
||||||
|
"""
|
||||||
|
|
||||||
p_args = (
|
p_args = (
|
||||||
OCRMYPDF
|
[sys.executable, '-m', 'ocrmypdf']
|
||||||
+ [str(arg) for arg in args if arg is not None]
|
+ [str(arg) for arg in args if arg is not None]
|
||||||
+ [str(input_file), str(output_file)]
|
+ [str(input_file), str(output_file)]
|
||||||
)
|
)
|
||||||
|
|
||||||
env = os.environ.copy()
|
|
||||||
p = run(
|
p = run(
|
||||||
p_args,
|
p_args,
|
||||||
stdout=PIPE,
|
stdout=PIPE,
|
||||||
stderr=PIPE,
|
stderr=PIPE,
|
||||||
text=text,
|
text=text,
|
||||||
env=env,
|
|
||||||
check=False,
|
check=False,
|
||||||
)
|
)
|
||||||
# print(p.stderr)
|
# print(p.stderr)
|
||||||
return p, p.stdout, p.stderr
|
return p, p.stdout, p.stderr
|
||||||
|
|
||||||
|
|
||||||
def first_page_dimensions(pdf):
|
def first_page_dimensions(pdf: Path):
|
||||||
info = pdfinfo.PdfInfo(pdf)
|
info = pdfinfo.PdfInfo(pdf)
|
||||||
page0 = info[0]
|
page0 = info[0]
|
||||||
return (page0.width_inches, page0.height_inches)
|
return (page0.width_inches, page0.height_inches)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user