mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-11-04 12:03:15 +00:00 
			
		
		
		
	test: Add test to ensure languages trickle down to ocr (#1857)
Closes [#93](https://github.com/Unstructured-IO/unstructured-inference/issues/93). Adds a test to ensure language parameters are passed all the way from `partition_pdf` down to the OCR calls. #### Testing: CI should pass.
This commit is contained in:
		
							parent
							
								
									b530e0a2be
								
							
						
					
					
						commit
						44cef80c82
					
				@ -1057,3 +1057,35 @@ def test_partition_model_name_default_to_None():
 | 
			
		||||
        )
 | 
			
		||||
    except AttributeError:
 | 
			
		||||
        pytest.fail("partition_pdf() raised AttributeError unexpectedly!")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
    ("strategy", "ocr_func"),
 | 
			
		||||
    [
 | 
			
		||||
        (
 | 
			
		||||
            "hi_res",
 | 
			
		||||
            "unstructured_pytesseract.image_to_data",
 | 
			
		||||
        ),
 | 
			
		||||
        (
 | 
			
		||||
            "ocr_only",
 | 
			
		||||
            "unstructured_pytesseract.run_and_get_multiple_output",
 | 
			
		||||
        ),
 | 
			
		||||
    ],
 | 
			
		||||
)
 | 
			
		||||
def test_ocr_language_passes_through(strategy, ocr_func):
 | 
			
		||||
    # Create an exception that will be raised directly after OCR is called to stop execution
 | 
			
		||||
    class CallException(Exception):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    mock_ocr_func = mock.Mock(side_effect=CallException("Function called!"))
 | 
			
		||||
    # Patch the ocr function with the mock that will record the call and then terminate
 | 
			
		||||
    with mock.patch(ocr_func, mock_ocr_func), pytest.raises(CallException):
 | 
			
		||||
        pdf.partition_pdf(
 | 
			
		||||
            "example-docs/layout-parser-paper-fast.pdf",
 | 
			
		||||
            strategy=strategy,
 | 
			
		||||
            ocr_languages="kor",
 | 
			
		||||
        )
 | 
			
		||||
    # Check that the language parameter was passed down as expected
 | 
			
		||||
    kwargs = mock_ocr_func.call_args.kwargs
 | 
			
		||||
    assert "lang" in kwargs
 | 
			
		||||
    assert kwargs["lang"] == "kor"
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user