rfctr(auto): improve typing and organize auto tests (#3355)

**Summary**
In preparation for further work on auto-partitioning (`partition()`),
improve typing and organize `test_auto.py` by introducing categories.
This commit is contained in:
Steve Canny 2024-07-08 14:25:17 -07:00 committed by GitHub
parent 609a08a95f
commit d48fa3b163
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 822 additions and 642 deletions

View File

@ -1,4 +1,4 @@
## 0.14.10-dev12
## 0.14.10-dev13
### Enhancements

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import base64
import logging
import math
@ -17,6 +19,7 @@ from unstructured.chunking.title import chunk_by_title
from unstructured.documents.coordinates import PixelSpace
from unstructured.documents.elements import (
CoordinatesMetadata,
Element,
ElementMetadata,
ElementType,
Footer,
@ -1182,11 +1185,14 @@ def test_extractable_elements_repair_invalid_pdf_structure(filename, expected_lo
def assert_element_extraction(
elements, extract_image_block_types, extract_image_block_to_payload, tmpdir
elements: list[Element],
extract_image_block_types: list[str],
extract_image_block_to_payload: bool,
tmpdir: str,
):
extracted_elements = []
extracted_elements: list[list[Element]] = []
for el_type in extract_image_block_types:
extracted_elements_by_type = []
extracted_elements_by_type: list[Element] = []
for el in elements:
if el.category == el_type:
extracted_elements_by_type.append(el)

File diff suppressed because it is too large Load Diff

View File

@ -1 +1 @@
__version__ = "0.14.10-dev12" # pragma: no cover
__version__ = "0.14.10-dev13" # pragma: no cover