mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-11-02 19:13:13 +00:00
test: Add test to ensure languages trickle down to ocr (#1857)
Closes [#93](https://github.com/Unstructured-IO/unstructured-inference/issues/93). Adds a test to ensure language parameters are passed all the way from `partition_pdf` down to the OCR calls. #### Testing: CI should pass.
This commit is contained in:
parent
b530e0a2be
commit
44cef80c82
@ -1057,3 +1057,35 @@ def test_partition_model_name_default_to_None():
|
||||
)
|
||||
except AttributeError:
|
||||
pytest.fail("partition_pdf() raised AttributeError unexpectedly!")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("strategy", "ocr_func"),
|
||||
[
|
||||
(
|
||||
"hi_res",
|
||||
"unstructured_pytesseract.image_to_data",
|
||||
),
|
||||
(
|
||||
"ocr_only",
|
||||
"unstructured_pytesseract.run_and_get_multiple_output",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ocr_language_passes_through(strategy, ocr_func):
|
||||
# Create an exception that will be raised directly after OCR is called to stop execution
|
||||
class CallException(Exception):
|
||||
pass
|
||||
|
||||
mock_ocr_func = mock.Mock(side_effect=CallException("Function called!"))
|
||||
# Patch the ocr function with the mock that will record the call and then terminate
|
||||
with mock.patch(ocr_func, mock_ocr_func), pytest.raises(CallException):
|
||||
pdf.partition_pdf(
|
||||
"example-docs/layout-parser-paper-fast.pdf",
|
||||
strategy=strategy,
|
||||
ocr_languages="kor",
|
||||
)
|
||||
# Check that the language parameter was passed down as expected
|
||||
kwargs = mock_ocr_func.call_args.kwargs
|
||||
assert "lang" in kwargs
|
||||
assert kwargs["lang"] == "kor"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user