2023-10-31 16:02:00 -05:00
|
|
|
import pytest
|
|
|
|
|
2023-12-01 12:56:31 -08:00
|
|
|
from unstructured.partition.pdf_image import pdf
|
2023-11-15 21:41:02 -08:00
|
|
|
from unstructured.partition.utils.constants import PartitionStrategy
|
2023-10-31 16:02:00 -05:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
def chipper_results():
|
|
|
|
elements = pdf.partition_pdf(
|
|
|
|
"example-docs/layout-parser-paper-fast.pdf",
|
2023-11-15 21:41:02 -08:00
|
|
|
strategy=PartitionStrategy.HI_RES,
|
2023-10-31 16:02:00 -05:00
|
|
|
model_name="chipper",
|
|
|
|
)
|
|
|
|
return elements
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
|
|
def chipper_children(chipper_results):
|
|
|
|
return [el for el in chipper_results if el.metadata.parent_id is not None]
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.chipper()
|
|
|
|
def test_chipper_has_hierarchy(chipper_children):
|
|
|
|
assert chipper_children
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.chipper()
|
|
|
|
def test_chipper_not_losing_parents(chipper_results, chipper_children):
|
|
|
|
assert all(
|
|
|
|
[el for el in chipper_results if el.id == child.metadata.parent_id]
|
|
|
|
for child in chipper_children
|
|
|
|
)
|