test: Replace many instances of run_ocrmypdf in subprocess with inline

2025-12-30 00:31:59 +00:00 · 2019-11-28 14:03:18 -08:00 · 2019-11-28 14:03:18 -08:00 · fde550f9a7
commit fde550f9a7
parent a3726e4ce3
4 changed files with 57 additions and 37 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -193,6 +193,22 @@ def check_ocrmypdf(input_file, output_file, *args, env=None):
    return output_file


+@pytest.helpers.register
+def run_ocrmypdf_api(input_file, output_file, *args, env=None):
+    "Run ocrmypdf and let caller deal with results"
+
+    options = cli.parser.parse_args(
+        [str(input_file), str(output_file)]
+        + [str(arg) for arg in args if arg is not None]
+    )
+    api.check_options(options)
+    if env:
+        options.tesseract_env = env
+        options.tesseract_env['_OCRMYPDF_TEST_INFILE'] = os.fspath(input_file)
+
+    return api.run_pipeline(options, api=False)
+
+
@pytest.helpers.register
 def run_ocrmypdf(input_file, output_file, *args, env=None, universal_newlines=True):
    "Run ocrmypdf and let caller deal with results"
--- a/tests/test_main.py
+++ b/tests/test_main.py
@ -39,6 +39,7 @@ from ocrmypdf.pdfinfo import Colorspace, Encoding, PdfInfo

 check_ocrmypdf = pytest.helpers.check_ocrmypdf
 run_ocrmypdf = pytest.helpers.run_ocrmypdf
+run_ocrmypdf_api = pytest.helpers.run_ocrmypdf_api
 spoof = pytest.helpers.spoof


@ -197,8 +198,8 @@ def test_oversample(spoof_tesseract_cache, renderer, resources, outpdf):


 def test_repeat_ocr(resources, no_outpdf):
-    p, _, _ = run_ocrmypdf(resources / 'graph_ocred.pdf', no_outpdf)
-    assert p.returncode != 0
+    result = run_ocrmypdf_api(resources / 'graph_ocred.pdf', no_outpdf)
+    assert result == ExitCode.already_done_ocr


 def test_force_ocr(spoof_tesseract_cache, resources, outpdf):
@ -300,34 +301,34 @@ def test_maximum_options(
    )


-def test_tesseract_missing_tessdata(resources, no_outpdf):
+def test_tesseract_missing_tessdata(resources, no_outpdf, tmpdir):
    env = os.environ.copy()
-    env['TESSDATA_PREFIX'] = '/tmp'
+    env['TESSDATA_PREFIX'] = tmpdir

-    p, _, err = run_ocrmypdf(
-        resources / 'graph_ocred.pdf', no_outpdf, '-v', '1', '--skip-text', env=env
+    returncode = run_ocrmypdf_api(
+        resources / 'graph.pdf', no_outpdf, '-v', '1', '--skip-text', env=env
    )
-    assert p.returncode == ExitCode.missing_dependency, err
+    assert returncode == ExitCode.missing_dependency


 def test_invalid_input_pdf(resources, no_outpdf):
-    p, out, err = run_ocrmypdf(resources / 'invalid.pdf', no_outpdf)
-    assert p.returncode == ExitCode.input_file, err
+    result = run_ocrmypdf_api(resources / 'invalid.pdf', no_outpdf)
+    assert result == ExitCode.input_file


 def test_blank_input_pdf(resources, outpdf):
-    p, out, err = run_ocrmypdf(resources / 'blank.pdf', outpdf)
-    assert p.returncode == ExitCode.ok
+    result = run_ocrmypdf_api(resources / 'blank.pdf', outpdf)
+    assert result == ExitCode.ok


 def test_force_ocr_on_pdf_with_no_images(spoof_tesseract_crash, resources, no_outpdf):
    # As a correctness test, make sure that --force-ocr on a PDF with no
    # content still triggers tesseract. If tesseract crashes, then it was
    # called.
-    p, _, err = run_ocrmypdf(
+    result = run_ocrmypdf_api(
        resources / 'blank.pdf', no_outpdf, '--force-ocr', env=spoof_tesseract_crash
    )
-    assert p.returncode == ExitCode.child_process_error, err
+    assert result == ExitCode.child_process_error
    assert not os.path.exists(no_outpdf)


@ -340,7 +341,7 @@ def test_german(spoof_tesseract_cache, resources, outdir):
    # properly. It is fine that we are testing -l deu on a French file because
    # we are exercising the functionality not going for accuracy.
    sidecar = outdir / 'francais.txt'
-    p, out, err = run_ocrmypdf(
+    result = run_ocrmypdf_api(
        resources / 'francais.pdf',
        outdir / 'francais.pdf',
        '-l',
@ -351,16 +352,16 @@ def test_german(spoof_tesseract_cache, resources, outdir):
    )
    if 'deu' not in tesseract.languages():
        pytest.xfail(reason="tesseract-deu language pack not installed")
-    assert p.returncode == ExitCode.ok, "Requires tesseract deu language pack"
+    assert result == ExitCode.ok, "Requires tesseract deu language pack"


 def test_klingon(resources, outpdf):
-    p, out, err = run_ocrmypdf(resources / 'francais.pdf', outpdf, '-l', 'klz')
+    p, _, _ = run_ocrmypdf(resources / 'francais.pdf', outpdf, '-l', 'klz')
    assert p.returncode == ExitCode.missing_dependency


 def test_missing_docinfo(spoof_tesseract_noop, resources, outpdf):
-    p, out, err = run_ocrmypdf(
+    result = run_ocrmypdf_api(
        resources / 'missing_docinfo.pdf',
        outpdf,
        '-l',
@ -368,7 +369,7 @@ def test_missing_docinfo(spoof_tesseract_noop, resources, outpdf):
        '--skip-text',
        env=spoof_tesseract_noop,
    )
-    assert p.returncode == ExitCode.ok, err
+    assert result == ExitCode.ok


 def test_uppercase_extension(spoof_tesseract_noop, resources, outdir):
@ -379,24 +380,24 @@ def test_uppercase_extension(spoof_tesseract_noop, resources, outdir):
    )


-def test_input_file_not_found(no_outpdf):
+def test_input_file_not_found(caplog, no_outpdf):
    input_file = "does not exist.pdf"
-    p, out, err = run_ocrmypdf(input_file, no_outpdf)
-    assert p.returncode == ExitCode.input_file
-    assert input_file in out or input_file in err
+    result = run_ocrmypdf_api(input_file, no_outpdf)
+    assert result == ExitCode.input_file
+    assert input_file in caplog.text


-def test_input_file_not_a_pdf(no_outpdf):
+def test_input_file_not_a_pdf(caplog, no_outpdf):
    input_file = __file__  # Try to OCR this file
-    p, out, err = run_ocrmypdf(input_file, no_outpdf)
-    assert p.returncode == ExitCode.input_file
-    assert input_file in out or input_file in err
+    result = run_ocrmypdf_api(input_file, no_outpdf)
+    assert result == ExitCode.input_file
+    assert input_file in caplog.text


-def test_encrypted(resources, no_outpdf):
-    p, out, err = run_ocrmypdf(resources / 'skew-encrypted.pdf', no_outpdf)
-    assert p.returncode == ExitCode.encrypted_pdf
-    assert out.find('encrypted')
+def test_encrypted(resources, caplog, no_outpdf):
+    result = run_ocrmypdf_api(resources / 'skew-encrypted.pdf', no_outpdf)
+    assert result == ExitCode.encrypted_pdf
+    assert 'encryption must be removed' in caplog.text


@pytest.mark.parametrize('renderer', RENDERERS)
@ -415,8 +416,8 @@ def test_pagesegmode(renderer, spoof_tesseract_cache, resources, outpdf):


@pytest.mark.parametrize('renderer', RENDERERS)
-def test_tesseract_crash(renderer, spoof_tesseract_crash, resources, no_outpdf):
-    p, out, err = run_ocrmypdf(
+def test_tesseract_crash(renderer, spoof_tesseract_crash, resources, no_outpdf, caplog):
+    result = run_ocrmypdf_api(
        resources / 'ccitt.pdf',
        no_outpdf,
        '-v',
@ -425,9 +426,9 @@ def test_tesseract_crash(renderer, spoof_tesseract_crash, resources, no_outpdf):
        renderer,
        env=spoof_tesseract_crash,
    )
-    assert p.returncode == ExitCode.child_process_error
+    assert result == ExitCode.child_process_error
    assert not os.path.exists(no_outpdf)
-    assert "ERROR" in err
+    assert "SubprocessOutputError" in caplog.text


 def test_tesseract_crash_autorotate(spoof_tesseract_crash, resources, no_outpdf):
--- a/tests/test_stdio.py
+++ b/tests/test_stdio.py
@ -29,6 +29,7 @@ from ocrmypdf.exec import qpdf
 # pylint: disable=no-member,redefined-outer-name

 run_ocrmypdf = pytest.helpers.run_ocrmypdf
+run_ocrmypdf_api = pytest.helpers.run_ocrmypdf
 spoof = pytest.helpers.spoof


--- a/tests/test_userunit.py
+++ b/tests/test_userunit.py
@ -24,6 +24,7 @@ from ocrmypdf.pdfinfo import PdfInfo

 check_ocrmypdf = pytest.helpers.check_ocrmypdf
 run_ocrmypdf = pytest.helpers.run_ocrmypdf
+run_ocrmypdf_api = pytest.helpers.run_ocrmypdf_api
 spoof = pytest.helpers.spoof


@ -32,9 +33,10 @@ def poster(resources):
    return resources / 'poster.pdf'


-def test_userunit_ghostscript_fails(poster, no_outpdf):
-    p, out, err = run_ocrmypdf(poster, no_outpdf, '--output-type=pdfa')
-    assert p.returncode == ExitCode.input_file
+def test_userunit_ghostscript_fails(poster, no_outpdf, caplog):
+    result = run_ocrmypdf_api(poster, no_outpdf, '--output-type=pdfa')
+    assert result == ExitCode.input_file
+    assert 'not supported by Ghostscript' in caplog.text


 def test_userunit_qpdf_passes(spoof_tesseract_cache, poster, outpdf):