mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-10 15:37:58 +00:00
test: Add test to ensure languages trickle down to ocr (#1857)
Closes [#93](https://github.com/Unstructured-IO/unstructured-inference/issues/93). Adds a test to ensure language parameters are passed all the way from `partition_pdf` down to the OCR calls. #### Testing: CI should pass.
This commit is contained in:
parent
b530e0a2be
commit
44cef80c82
@ -1057,3 +1057,35 @@ def test_partition_model_name_default_to_None():
|
|||||||
)
|
)
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
pytest.fail("partition_pdf() raised AttributeError unexpectedly!")
|
pytest.fail("partition_pdf() raised AttributeError unexpectedly!")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("strategy", "ocr_func"),
|
||||||
|
[
|
||||||
|
(
|
||||||
|
"hi_res",
|
||||||
|
"unstructured_pytesseract.image_to_data",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"ocr_only",
|
||||||
|
"unstructured_pytesseract.run_and_get_multiple_output",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_ocr_language_passes_through(strategy, ocr_func):
|
||||||
|
# Create an exception that will be raised directly after OCR is called to stop execution
|
||||||
|
class CallException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
mock_ocr_func = mock.Mock(side_effect=CallException("Function called!"))
|
||||||
|
# Patch the ocr function with the mock that will record the call and then terminate
|
||||||
|
with mock.patch(ocr_func, mock_ocr_func), pytest.raises(CallException):
|
||||||
|
pdf.partition_pdf(
|
||||||
|
"example-docs/layout-parser-paper-fast.pdf",
|
||||||
|
strategy=strategy,
|
||||||
|
ocr_languages="kor",
|
||||||
|
)
|
||||||
|
# Check that the language parameter was passed down as expected
|
||||||
|
kwargs = mock_ocr_func.call_args.kwargs
|
||||||
|
assert "lang" in kwargs
|
||||||
|
assert kwargs["lang"] == "kor"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user