mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
chore: adding test case for odt tables (#1434)
ODT table extraction is happening! Just added to an existing example-doc and an accompanying test case.
This commit is contained in:
parent
b534b2a6cd
commit
a9f18eddb8
@ -1,5 +1,12 @@
|
||||
## 0.10.15
|
||||
## 0.10.16-dev0
|
||||
|
||||
### Enhancements
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
||||
## 0.10.15
|
||||
|
||||
### Enhancements
|
||||
|
||||
|
Binary file not shown.
@ -2,7 +2,7 @@ import os
|
||||
import pathlib
|
||||
|
||||
from unstructured.chunking.title import chunk_by_title
|
||||
from unstructured.documents.elements import Title
|
||||
from unstructured.documents.elements import Table, Title
|
||||
from unstructured.partition.json import partition_json
|
||||
from unstructured.partition.odt import partition_odt
|
||||
from unstructured.staging.base import elements_to_json
|
||||
@ -14,7 +14,16 @@ EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "..", "..", "..", "example-docs
|
||||
def test_partition_odt_from_filename():
|
||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
||||
elements = partition_odt(filename=filename)
|
||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
||||
assert elements == [
|
||||
Title("Lorem ipsum dolor sit amet."),
|
||||
Table(
|
||||
text="\nHeader row Mon Wed"
|
||||
" Fri\nColor Blue"
|
||||
" Red Green\nTime 1pm"
|
||||
" 2pm 3pm\nLeader "
|
||||
"Sarah Mark Ryan",
|
||||
),
|
||||
]
|
||||
for element in elements:
|
||||
assert element.metadata.filename == "fake.odt"
|
||||
|
||||
@ -29,8 +38,16 @@ def test_partition_odt_from_file():
|
||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
||||
with open(filename, "rb") as f:
|
||||
elements = partition_odt(file=f)
|
||||
|
||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
||||
assert elements == [
|
||||
Title("Lorem ipsum dolor sit amet."),
|
||||
Table(
|
||||
text="\nHeader row Mon Wed"
|
||||
" Fri\nColor Blue"
|
||||
" Red Green\nTime 1pm"
|
||||
" 2pm 3pm\nLeader "
|
||||
"Sarah Mark Ryan",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def test_partition_odt_from_file_with_metadata_filename():
|
||||
|
@ -554,7 +554,7 @@ def test_auto_partition_works_with_unstructured_jsons_from_file():
|
||||
def test_auto_partition_odt_from_filename():
|
||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
||||
elements = partition(filename=filename, strategy="hi_res")
|
||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
||||
assert elements[0] == Title("Lorem ipsum dolor sit amet.")
|
||||
|
||||
|
||||
def test_auto_partition_odt_from_file():
|
||||
@ -562,7 +562,7 @@ def test_auto_partition_odt_from_file():
|
||||
with open(filename, "rb") as f:
|
||||
elements = partition(file=f, strategy="hi_res")
|
||||
|
||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
||||
assert elements[0] == Title("Lorem ipsum dolor sit amet.")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.10.15" # pragma: no cover
|
||||
__version__ = "0.10.16-dev0" # pragma: no cover
|
||||
|
Loading…
x
Reference in New Issue
Block a user