mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
fix(auto): quick fix for auto test failing in CI (#3715)
Better fix to follow.
This commit is contained in:
parent
06c85235ee
commit
2f496f867c
@ -1,4 +1,4 @@
|
||||
## 0.15.14-dev13
|
||||
## 0.15.14-dev14
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
* **Remove obsolete min_partition/max_partition args from TXT and EML.** The legacy `min_partition` and `max_partition` parameters were an initial rough implementation of chunking but now interfere with chunking and are unused. Remove those parameters from `partition_text()` and `partition_email()`.
|
||||
* **Remove double-decoration on EML and MSG.** Refactor these partitioners to rely on the new `@apply_metadata()` decorator operating on partitioners they delegate to (TXT, HTML, and all others for attachments) and remove direct decoration from EML and MSG.
|
||||
* **Remove double-decoration for PPT.** Remove decorators from the delegating PPT partitioner.
|
||||
* **Quick-fix CI error in auto test-filetype.** Better fix to follow shortly.
|
||||
|
||||
## 0.15.13
|
||||
|
||||
|
@ -1207,35 +1207,39 @@ def test_auto_partition_overwrites_any_filetype_applied_by_file_specific_partiti
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file_type",
|
||||
("file_name", "file_type"),
|
||||
[
|
||||
t
|
||||
for t in FileType
|
||||
if t
|
||||
not in (
|
||||
FileType.EMPTY,
|
||||
FileType.JSON,
|
||||
FileType.UNK,
|
||||
FileType.WAV,
|
||||
FileType.XLS,
|
||||
FileType.ZIP,
|
||||
)
|
||||
and t.partitioner_shortname != "image"
|
||||
("stanley-cups.csv", FileType.CSV),
|
||||
("simple.doc", FileType.DOC),
|
||||
("simple.docx", FileType.DOCX),
|
||||
("fake-email.eml", FileType.EML),
|
||||
("simple.epub", FileType.EPUB),
|
||||
("fake-html.html", FileType.HTML),
|
||||
("README.md", FileType.MD),
|
||||
("fake-email.msg", FileType.MSG),
|
||||
("simple.odt", FileType.ODT),
|
||||
("pdf/DA-1p.pdf", FileType.PDF),
|
||||
("fake-power-point.ppt", FileType.PPT),
|
||||
("simple.pptx", FileType.PPTX),
|
||||
("README.rst", FileType.RST),
|
||||
("fake-doc.rtf", FileType.RTF),
|
||||
("stanley-cups.tsv", FileType.TSV),
|
||||
("fake-text.txt", FileType.TXT),
|
||||
("tests-example.xls", FileType.XLSX),
|
||||
("stanley-cups.xlsx", FileType.XLSX),
|
||||
("factbook.xml", FileType.XML),
|
||||
],
|
||||
)
|
||||
def test_auto_partition_applies_the_correct_filetype_for_all_filetypes(file_type: FileType):
|
||||
def test_auto_partition_applies_the_correct_filetype_for_all_filetypes(
|
||||
file_name: str, file_type: FileType
|
||||
):
|
||||
file_path = example_doc_path(file_name)
|
||||
partition_fn_name = file_type.partitioner_function_name
|
||||
module = import_module(file_type.partitioner_module_qname)
|
||||
partition_fn = getattr(module, partition_fn_name)
|
||||
|
||||
# -- partition the first example-doc with the extension for this filetype --
|
||||
elements: list[Element] = []
|
||||
doc_path = example_doc_path("pdf") if file_type == FileType.PDF else example_doc_path("")
|
||||
extensions = file_type._extensions
|
||||
for file in pathlib.Path(doc_path).iterdir():
|
||||
if file.is_file() and file.suffix in extensions:
|
||||
elements = partition_fn(str(file))
|
||||
break
|
||||
# -- partition the example-doc for this filetype --
|
||||
elements = partition_fn(file_path)
|
||||
|
||||
assert elements
|
||||
assert all(
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.15.14-dev13" # pragma: no cover
|
||||
__version__ = "0.15.14-dev14" # pragma: no cover
|
||||
|
Loading…
x
Reference in New Issue
Block a user