2023-06-23 20:45:31 +02:00
|
|
|
from unstructured.documents.elements import Title
|
|
|
|
from unstructured.partition.org import partition_org
|
|
|
|
|
|
|
|
|
|
|
|
def test_partition_org_from_filename(filename="example-docs/README.org"):
|
|
|
|
elements = partition_org(filename=filename)
|
|
|
|
|
|
|
|
assert elements[0] == Title("Example Docs")
|
|
|
|
assert elements[0].metadata.filetype == "text/org"
|
|
|
|
|
|
|
|
|
2023-07-05 15:02:22 -05:00
|
|
|
def test_partition_org_from_filename_with_metadata_filename(filename="example-docs/README.org"):
|
|
|
|
elements = partition_org(filename=filename, metadata_filename="test")
|
|
|
|
|
|
|
|
assert elements[0] == Title("Example Docs")
|
|
|
|
assert elements[0].metadata.filename == "test"
|
|
|
|
|
|
|
|
|
2023-06-23 20:45:31 +02:00
|
|
|
def test_partition_org_from_file(filename="example-docs/README.org"):
|
|
|
|
with open(filename, "rb") as f:
|
|
|
|
elements = partition_org(file=f)
|
|
|
|
|
|
|
|
assert elements[0] == Title("Example Docs")
|
|
|
|
assert elements[0].metadata.filetype == "text/org"
|
2023-06-30 09:44:46 -05:00
|
|
|
|
|
|
|
|
2023-07-05 15:02:22 -05:00
|
|
|
def test_partition_org_from_file_with_metadata_filename(filename="example-docs/README.org"):
|
|
|
|
with open(filename, "rb") as f:
|
|
|
|
elements = partition_org(file=f, metadata_filename="test")
|
|
|
|
|
|
|
|
assert elements[0] == Title("Example Docs")
|
|
|
|
assert elements[0].metadata.filename == "test"
|
|
|
|
|
|
|
|
|
2023-06-30 09:44:46 -05:00
|
|
|
def test_partition_org_from_filename_exclude_metadata(filename="example-docs/README.org"):
|
|
|
|
elements = partition_org(filename=filename, include_metadata=False)
|
|
|
|
|
|
|
|
for i in range(len(elements)):
|
|
|
|
assert elements[i].metadata.to_dict() == {}
|
|
|
|
|
|
|
|
|
|
|
|
def test_partition_org_from_file_exclude_metadata(filename="example-docs/README.org"):
|
|
|
|
with open(filename, "rb") as f:
|
|
|
|
elements = partition_org(file=f, include_metadata=False)
|
|
|
|
|
|
|
|
for i in range(len(elements)):
|
|
|
|
assert elements[i].metadata.to_dict() == {}
|