chore: adding test case for odt tables (#1434)

ODT table extraction is happening! Just added to an existing example-doc
and an accompanying test case.
This commit is contained in:
Amanda Cameron 2023-09-16 22:29:44 -07:00 committed by GitHub
parent b534b2a6cd
commit a9f18eddb8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 32 additions and 8 deletions

View File

@ -1,5 +1,12 @@
## 0.10.15
## 0.10.16-dev0
### Enhancements
### Features
### Fixes
## 0.10.15
### Enhancements

Binary file not shown.

View File

@ -2,7 +2,7 @@ import os
import pathlib
from unstructured.chunking.title import chunk_by_title
from unstructured.documents.elements import Title
from unstructured.documents.elements import Table, Title
from unstructured.partition.json import partition_json
from unstructured.partition.odt import partition_odt
from unstructured.staging.base import elements_to_json
@ -14,7 +14,16 @@ EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "..", "..", "..", "example-docs
def test_partition_odt_from_filename():
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
elements = partition_odt(filename=filename)
assert elements == [Title("Lorem ipsum dolor sit amet.")]
assert elements == [
Title("Lorem ipsum dolor sit amet."),
Table(
text="\nHeader row Mon Wed"
" Fri\nColor Blue"
" Red Green\nTime 1pm"
" 2pm 3pm\nLeader "
"Sarah Mark Ryan",
),
]
for element in elements:
assert element.metadata.filename == "fake.odt"
@ -29,8 +38,16 @@ def test_partition_odt_from_file():
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
with open(filename, "rb") as f:
elements = partition_odt(file=f)
assert elements == [Title("Lorem ipsum dolor sit amet.")]
assert elements == [
Title("Lorem ipsum dolor sit amet."),
Table(
text="\nHeader row Mon Wed"
" Fri\nColor Blue"
" Red Green\nTime 1pm"
" 2pm 3pm\nLeader "
"Sarah Mark Ryan",
),
]
def test_partition_odt_from_file_with_metadata_filename():

View File

@ -554,7 +554,7 @@ def test_auto_partition_works_with_unstructured_jsons_from_file():
def test_auto_partition_odt_from_filename():
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
elements = partition(filename=filename, strategy="hi_res")
assert elements == [Title("Lorem ipsum dolor sit amet.")]
assert elements[0] == Title("Lorem ipsum dolor sit amet.")
def test_auto_partition_odt_from_file():
@ -562,7 +562,7 @@ def test_auto_partition_odt_from_file():
with open(filename, "rb") as f:
elements = partition(file=f, strategy="hi_res")
assert elements == [Title("Lorem ipsum dolor sit amet.")]
assert elements[0] == Title("Lorem ipsum dolor sit amet.")
@pytest.mark.parametrize(

View File

@ -1 +1 @@
__version__ = "0.10.15" # pragma: no cover
__version__ = "0.10.16-dev0" # pragma: no cover