mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-12-28 07:33:36 +00:00
fix: allow partition_email to process emails with no content (#797)
* version and changelog * ingest-test-fixtures-update
This commit is contained in:
parent
8683e2695c
commit
901ef16835
@ -1,4 +1,4 @@
|
||||
## 0.7.8-dev0
|
||||
## 0.7.8-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* `partition_email` now works if there is no message content
|
||||
* Updates the `"fast"` strategy for `partition_pdf` so that it's able to recursively
|
||||
* Adds recursive functionality to all fsspec connectors
|
||||
* Adds generic --recursive ingest flag
|
||||
|
||||
@ -322,7 +322,7 @@ def test_convert_to_iso_8601(time, expected):
|
||||
assert iso_time == expected
|
||||
|
||||
|
||||
def test_partition_email_raises_with_no_html_content():
|
||||
def test_partition_email_still_works_with_no_content():
|
||||
filename = os.path.join(EXAMPLE_DOCS_DIRECTORY, "email-no-html-content-1.eml")
|
||||
with pytest.raises(ValueError):
|
||||
partition_email(filename=filename)
|
||||
elements = partition_email(filename=filename)
|
||||
assert elements == []
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "0.7.8-dev0" # pragma: no cover
|
||||
__version__ = "0.7.8-dev1" # pragma: no cover
|
||||
|
||||
@ -283,9 +283,9 @@ def partition_email(
|
||||
|
||||
content = content_map.get(content_source, "")
|
||||
if not content:
|
||||
raise ValueError(f"{content_source} content not found in email")
|
||||
elements = []
|
||||
|
||||
if content_source == "text/html":
|
||||
elif content_source == "text/html":
|
||||
# NOTE(robinson) - In the .eml files, the HTML content gets stored in a format that
|
||||
# looks like the following, resulting in extraneous "=" characters in the output if
|
||||
# you don't clean it up
|
||||
@ -316,6 +316,7 @@ def partition_email(
|
||||
break
|
||||
except (UnicodeDecodeError, UnicodeError):
|
||||
continue
|
||||
|
||||
elif content_source == "text/plain":
|
||||
list_content = split_by_paragraph(content)
|
||||
elements = partition_text(text=content, encoding=encoding)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user