Christine Straub 29b9ea7ba6
refactor: ocr modules (#2492)
The purpose of this PR is to refactor OCR-related modules to reduce
unnecessary module imports to avoid potential issues (most likely due to
a "circular import").

### Summary
- add `inference_utils` module
(unstructured/partition/pdf_image/inference_utils.py) to define
unstructured-inference library related utility functions, which will
reduce importing unstructured-inference library functions in other files
- add `conftest.py` in `test_unstructured/partition/pdf_image/`
directory to define fixtures that are available to all tests in the same
directory and its subdirectories

### Testing
CI should pass
2024-02-06 17:11:55 +00:00

79 lines
2.2 KiB
Python

import pytest
from unstructured_inference.inference.elements import EmbeddedTextRegion
@pytest.fixture()
def mock_embedded_text_regions():
return [
EmbeddedTextRegion.from_coords(
x1=453.00277777777774,
y1=317.319341111111,
x2=711.5338541666665,
y2=358.28571222222206,
text="LayoutParser:",
),
EmbeddedTextRegion.from_coords(
x1=726.4778125,
y1=317.319341111111,
x2=760.3308594444444,
y2=357.1698966666667,
text="A",
),
EmbeddedTextRegion.from_coords(
x1=775.2748177777777,
y1=317.319341111111,
x2=917.3579885555555,
y2=357.1698966666667,
text="Unified",
),
EmbeddedTextRegion.from_coords(
x1=932.3019468888888,
y1=317.319341111111,
x2=1071.8426522222221,
y2=357.1698966666667,
text="Toolkit",
),
EmbeddedTextRegion.from_coords(
x1=1086.7866105555556,
y1=317.319341111111,
x2=1141.2105142777777,
y2=357.1698966666667,
text="for",
),
EmbeddedTextRegion.from_coords(
x1=1156.154472611111,
y1=317.319341111111,
x2=1256.334784222222,
y2=357.1698966666667,
text="Deep",
),
EmbeddedTextRegion.from_coords(
x1=437.83888888888885,
y1=367.13322999999986,
x2=610.0171992222222,
y2=406.9837855555556,
text="Learning",
),
EmbeddedTextRegion.from_coords(
x1=624.9611575555555,
y1=367.13322999999986,
x2=741.6754646666665,
y2=406.9837855555556,
text="Based",
),
EmbeddedTextRegion.from_coords(
x1=756.619423,
y1=367.13322999999986,
x2=958.3867708333332,
y2=406.9837855555556,
text="Document",
),
EmbeddedTextRegion.from_coords(
x1=973.3307291666665,
y1=367.13322999999986,
x2=1092.0535042777776,
y2=406.9837855555556,
text="Image",
),
]