mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
fix: text kwargs no longer fail with empty string (#413)
* fix: text kwargs no longer fail with empty string * linting
This commit is contained in:
parent
75cf233702
commit
09b52b4fc4
@ -1,4 +1,4 @@
|
||||
## 0.5.8-dev0
|
||||
## 0.5.8-dev1
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -11,6 +11,9 @@
|
||||
### Fixes
|
||||
|
||||
* `convert_file_to_text` now passes through the `source_format` and `target_format` kwargs.
|
||||
* Partitioning functions that accept a `text` kwarg no longer raise an error if an empty
|
||||
string is passed (and empty list of elements is returned instead).
|
||||
* `partition_json` no longer fails if the input is an empty list.
|
||||
|
||||
## 0.5.7
|
||||
|
||||
|
@ -140,6 +140,10 @@ def test_partition_email_from_text():
|
||||
assert elements == EXPECTED_OUTPUT
|
||||
|
||||
|
||||
def test_partition_email_from_text_work_with_empty_string():
|
||||
assert partition_email(text="") == []
|
||||
|
||||
|
||||
def test_partition_email_from_filename_with_embedded_image():
|
||||
filename = os.path.join(DIRECTORY, "..", "..", "example-docs", "fake-email-image-embedded.eml")
|
||||
elements = partition_email(filename=filename, content_source="text/plain")
|
||||
|
@ -41,6 +41,10 @@ def test_partition_html_from_text():
|
||||
assert len(elements) > 0
|
||||
|
||||
|
||||
def test_partition_html_from_text_works_with_empty_string():
|
||||
assert partition_html(text="") == []
|
||||
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, text, status_code, headers={}):
|
||||
self.text = text
|
||||
|
@ -87,6 +87,14 @@ def test_partition_json_raises_with_none_specified():
|
||||
partition_json()
|
||||
|
||||
|
||||
def test_partition_json_works_with_empty_string():
|
||||
assert partition_json(text="") == []
|
||||
|
||||
|
||||
def test_partition_json_works_with_empty_list():
|
||||
assert partition_json(text="[]") == []
|
||||
|
||||
|
||||
def test_partition_json_raises_with_too_many_specified():
|
||||
path = os.path.join(DIRECTORY, "..", "..", "example-docs", "fake-text.txt")
|
||||
elements = partition(filename=path)
|
||||
|
@ -59,6 +59,10 @@ def test_partition_text_from_text():
|
||||
assert elements == EXPECTED_OUTPUT
|
||||
|
||||
|
||||
def test_partition_text_from_text_works_with_empty_string():
|
||||
assert partition_text(text="") == []
|
||||
|
||||
|
||||
def test_partition_text_raises_with_none_specified():
|
||||
with pytest.raises(ValueError):
|
||||
partition_text()
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.5.8-dev0" # pragma: no cover
|
||||
__version__ = "0.5.8-dev1" # pragma: no cover
|
||||
|
@ -168,6 +168,9 @@ def partition_email(
|
||||
f"Valid content sources are: {VALID_CONTENT_SOURCES}",
|
||||
)
|
||||
|
||||
if text is not None and text.strip() == "" and not file and not filename:
|
||||
return []
|
||||
|
||||
# Verify that only one of the arguments was provided
|
||||
exactly_one(filename=filename, file=file, text=text)
|
||||
|
||||
|
@ -45,6 +45,9 @@ def partition_html(
|
||||
parser
|
||||
The parser to use for parsing the HTML document. If None, default parser will be used.
|
||||
"""
|
||||
if text is not None and text.strip() == "" and not file and not filename and not url:
|
||||
return []
|
||||
|
||||
# Verify that only one of the arguments was provided
|
||||
exactly_one(filename=filename, file=file, text=text, url=url)
|
||||
|
||||
|
@ -6,7 +6,7 @@ from unstructured.documents.elements import Element
|
||||
from unstructured.partition.common import exactly_one
|
||||
from unstructured.staging.base import dict_to_elements
|
||||
|
||||
LIST_OF_DICTS_PATTERN = r"\A\s*\[\s*{"
|
||||
LIST_OF_DICTS_PATTERN = r"\A\s*\[\s*{?"
|
||||
|
||||
|
||||
def partition_json(
|
||||
@ -15,6 +15,9 @@ def partition_json(
|
||||
text: Optional[str] = None,
|
||||
) -> List[Element]:
|
||||
"""Partitions an .json document into its constituent elements."""
|
||||
if text is not None and text.strip() == "" and not file and not filename:
|
||||
return []
|
||||
|
||||
exactly_one(filenmae=filename, file=file, text=text)
|
||||
|
||||
if filename is not None:
|
||||
|
@ -43,6 +43,8 @@ def partition_text(
|
||||
encoding
|
||||
The encoding method used to decode the text input. If None, utf-8 will be used.
|
||||
"""
|
||||
if text is not None and text.strip() == "" and not file and not filename:
|
||||
return []
|
||||
|
||||
# Verify that only one of the arguments was provided
|
||||
exactly_one(filename=filename, file=file, text=text)
|
||||
|
Loading…
x
Reference in New Issue
Block a user