mirror of
				https://github.com/Unstructured-IO/unstructured.git
				synced 2025-11-03 19:43:24 +00:00 
			
		
		
		
	The purpose of this PR is to refactor OCR-related modules to reduce unnecessary module imports to avoid potential issues (most likely due to a "circular import"). ### Summary - add `inference_utils` module (unstructured/partition/pdf_image/inference_utils.py) to define unstructured-inference library related utility functions, which will reduce importing unstructured-inference library functions in other files - add `conftest.py` in `test_unstructured/partition/pdf_image/` directory to define fixtures that are available to all tests in the same directory and its subdirectories ### Testing CI should pass
		
			
				
	
	
		
			79 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			79 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import pytest
 | 
						|
from unstructured_inference.inference.elements import EmbeddedTextRegion
 | 
						|
 | 
						|
 | 
						|
@pytest.fixture()
 | 
						|
def mock_embedded_text_regions():
 | 
						|
    return [
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=453.00277777777774,
 | 
						|
            y1=317.319341111111,
 | 
						|
            x2=711.5338541666665,
 | 
						|
            y2=358.28571222222206,
 | 
						|
            text="LayoutParser:",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=726.4778125,
 | 
						|
            y1=317.319341111111,
 | 
						|
            x2=760.3308594444444,
 | 
						|
            y2=357.1698966666667,
 | 
						|
            text="A",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=775.2748177777777,
 | 
						|
            y1=317.319341111111,
 | 
						|
            x2=917.3579885555555,
 | 
						|
            y2=357.1698966666667,
 | 
						|
            text="Unified",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=932.3019468888888,
 | 
						|
            y1=317.319341111111,
 | 
						|
            x2=1071.8426522222221,
 | 
						|
            y2=357.1698966666667,
 | 
						|
            text="Toolkit",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=1086.7866105555556,
 | 
						|
            y1=317.319341111111,
 | 
						|
            x2=1141.2105142777777,
 | 
						|
            y2=357.1698966666667,
 | 
						|
            text="for",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=1156.154472611111,
 | 
						|
            y1=317.319341111111,
 | 
						|
            x2=1256.334784222222,
 | 
						|
            y2=357.1698966666667,
 | 
						|
            text="Deep",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=437.83888888888885,
 | 
						|
            y1=367.13322999999986,
 | 
						|
            x2=610.0171992222222,
 | 
						|
            y2=406.9837855555556,
 | 
						|
            text="Learning",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=624.9611575555555,
 | 
						|
            y1=367.13322999999986,
 | 
						|
            x2=741.6754646666665,
 | 
						|
            y2=406.9837855555556,
 | 
						|
            text="Based",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=756.619423,
 | 
						|
            y1=367.13322999999986,
 | 
						|
            x2=958.3867708333332,
 | 
						|
            y2=406.9837855555556,
 | 
						|
            text="Document",
 | 
						|
        ),
 | 
						|
        EmbeddedTextRegion.from_coords(
 | 
						|
            x1=973.3307291666665,
 | 
						|
            y1=367.13322999999986,
 | 
						|
            x2=1092.0535042777776,
 | 
						|
            y2=406.9837855555556,
 | 
						|
            text="Image",
 | 
						|
        ),
 | 
						|
    ]
 |