mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
chore: adding test case for odt tables (#1434)
ODT table extraction is happening! Just added to an existing example-doc and an accompanying test case.
This commit is contained in:
parent
b534b2a6cd
commit
a9f18eddb8
@ -1,5 +1,12 @@
|
|||||||
## 0.10.15
|
## 0.10.16-dev0
|
||||||
|
|
||||||
|
### Enhancements
|
||||||
|
|
||||||
|
### Features
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
## 0.10.15
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
|
Binary file not shown.
@ -2,7 +2,7 @@ import os
|
|||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from unstructured.chunking.title import chunk_by_title
|
from unstructured.chunking.title import chunk_by_title
|
||||||
from unstructured.documents.elements import Title
|
from unstructured.documents.elements import Table, Title
|
||||||
from unstructured.partition.json import partition_json
|
from unstructured.partition.json import partition_json
|
||||||
from unstructured.partition.odt import partition_odt
|
from unstructured.partition.odt import partition_odt
|
||||||
from unstructured.staging.base import elements_to_json
|
from unstructured.staging.base import elements_to_json
|
||||||
@ -14,7 +14,16 @@ EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "..", "..", "..", "example-docs
|
|||||||
def test_partition_odt_from_filename():
|
def test_partition_odt_from_filename():
|
||||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
||||||
elements = partition_odt(filename=filename)
|
elements = partition_odt(filename=filename)
|
||||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
assert elements == [
|
||||||
|
Title("Lorem ipsum dolor sit amet."),
|
||||||
|
Table(
|
||||||
|
text="\nHeader row Mon Wed"
|
||||||
|
" Fri\nColor Blue"
|
||||||
|
" Red Green\nTime 1pm"
|
||||||
|
" 2pm 3pm\nLeader "
|
||||||
|
"Sarah Mark Ryan",
|
||||||
|
),
|
||||||
|
]
|
||||||
for element in elements:
|
for element in elements:
|
||||||
assert element.metadata.filename == "fake.odt"
|
assert element.metadata.filename == "fake.odt"
|
||||||
|
|
||||||
@ -29,8 +38,16 @@ def test_partition_odt_from_file():
|
|||||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
elements = partition_odt(file=f)
|
elements = partition_odt(file=f)
|
||||||
|
assert elements == [
|
||||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
Title("Lorem ipsum dolor sit amet."),
|
||||||
|
Table(
|
||||||
|
text="\nHeader row Mon Wed"
|
||||||
|
" Fri\nColor Blue"
|
||||||
|
" Red Green\nTime 1pm"
|
||||||
|
" 2pm 3pm\nLeader "
|
||||||
|
"Sarah Mark Ryan",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_partition_odt_from_file_with_metadata_filename():
|
def test_partition_odt_from_file_with_metadata_filename():
|
||||||
|
@ -554,7 +554,7 @@ def test_auto_partition_works_with_unstructured_jsons_from_file():
|
|||||||
def test_auto_partition_odt_from_filename():
|
def test_auto_partition_odt_from_filename():
|
||||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "fake.odt")
|
||||||
elements = partition(filename=filename, strategy="hi_res")
|
elements = partition(filename=filename, strategy="hi_res")
|
||||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
assert elements[0] == Title("Lorem ipsum dolor sit amet.")
|
||||||
|
|
||||||
|
|
||||||
def test_auto_partition_odt_from_file():
|
def test_auto_partition_odt_from_file():
|
||||||
@ -562,7 +562,7 @@ def test_auto_partition_odt_from_file():
|
|||||||
with open(filename, "rb") as f:
|
with open(filename, "rb") as f:
|
||||||
elements = partition(file=f, strategy="hi_res")
|
elements = partition(file=f, strategy="hi_res")
|
||||||
|
|
||||||
assert elements == [Title("Lorem ipsum dolor sit amet.")]
|
assert elements[0] == Title("Lorem ipsum dolor sit amet.")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.10.15" # pragma: no cover
|
__version__ = "0.10.16-dev0" # pragma: no cover
|
||||||
|
Loading…
x
Reference in New Issue
Block a user