2019-12-30 17:51:09 -08:00
|
|
|
# © 2019 James R. Barlow: github.com/jbarlow83
|
|
|
|
#
|
2020-08-05 00:44:42 -07:00
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
|
2019-12-30 17:51:09 -08:00
|
|
|
|
2022-07-23 00:39:24 -07:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2019-12-30 17:51:09 -08:00
|
|
|
import logging
|
2020-04-30 03:38:27 -07:00
|
|
|
from io import BytesIO, StringIO
|
2019-12-30 17:51:09 -08:00
|
|
|
|
|
|
|
import pytest
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
|
|
import ocrmypdf
|
|
|
|
|
|
|
|
|
|
|
|
def test_raw_console():
|
|
|
|
bio = StringIO()
|
|
|
|
tqconsole = ocrmypdf.api.TqdmConsole(file=bio)
|
|
|
|
tqconsole.write("Test")
|
|
|
|
tqconsole.flush()
|
|
|
|
assert "Test" in bio.getvalue()
|
|
|
|
|
|
|
|
|
|
|
|
def test_tqdm_console():
|
|
|
|
log = logging.getLogger()
|
|
|
|
log.setLevel(logging.INFO)
|
|
|
|
|
|
|
|
formatter = logging.Formatter('%(message)s')
|
|
|
|
|
|
|
|
bio = StringIO()
|
|
|
|
console = logging.StreamHandler(ocrmypdf.api.TqdmConsole(file=bio))
|
|
|
|
console.setFormatter(formatter)
|
|
|
|
|
|
|
|
log.addHandler(console)
|
|
|
|
|
|
|
|
def before_pbar(message):
|
|
|
|
# Ensure that log messages appear before the progress bar, even when
|
|
|
|
# printed after the progress bar updates.
|
|
|
|
v = bio.getvalue()
|
|
|
|
pbar_start_marker = '|#'
|
|
|
|
return v.index(message) < v.index(pbar_start_marker)
|
|
|
|
|
|
|
|
with tqdm(total=2, file=bio, disable=False) as pbar:
|
|
|
|
pbar.update()
|
|
|
|
msg = "1/2 above progress bar"
|
|
|
|
log.info(msg)
|
|
|
|
assert before_pbar(msg)
|
|
|
|
|
|
|
|
log.info("done")
|
|
|
|
assert not before_pbar("done")
|
2020-04-14 23:18:52 -07:00
|
|
|
|
|
|
|
|
|
|
|
def test_language_list():
|
2020-04-15 02:26:20 -07:00
|
|
|
with pytest.raises(
|
|
|
|
(ocrmypdf.exceptions.InputFileError, ocrmypdf.exceptions.MissingDependencyError)
|
|
|
|
):
|
|
|
|
ocrmypdf.ocr('doesnotexist.pdf', '_.pdf', language=['eng', 'deu'])
|
2020-04-30 03:38:27 -07:00
|
|
|
|
|
|
|
|
|
|
|
def test_stream_api(resources):
|
|
|
|
in_ = (resources / 'graph.pdf').open('rb')
|
|
|
|
out = BytesIO()
|
|
|
|
|
|
|
|
ocrmypdf.ocr(in_, out, tesseract_timeout=0.0)
|
|
|
|
out.seek(0)
|
|
|
|
assert b'%PDF' in out.read(1024)
|