mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
fix: throw validation error when json is passed with invalid unstructured json (#4002)
### Notes Adds validation if `json` / `ndjson` are not valid unstructured schema. ### Testing Manually tested serverless API with example json: ``` test_length = [] = 200 test_invalid = [{"invalid": "schema"}] = 422 test_invalid_ndjson ={"hi": "there"} = 422 test_chunk = [{"type":"Header","element_id":"a23fdadef9277f217563e217ebd074d5" ... = 200 ```
This commit is contained in:
parent
e3417d7e98
commit
570ee078a4
@ -6,6 +6,7 @@
|
||||
|
||||
### Fixes
|
||||
- **Fix image extraction for PNG files.** When `extract_image_block_to_payload` is True, and the image is a PNG, we get a Pillow error. We need to remove the PNG transparency layer before saving the image.
|
||||
- **Throw validation error when json is passed with invalid unstructured json
|
||||
|
||||
## 0.17.6
|
||||
|
||||
|
@ -187,6 +187,11 @@ def test_partition_json_works_with_empty_string():
|
||||
assert partition_json(text="") == []
|
||||
|
||||
|
||||
def test_partition_json_fails_with_empty_item():
|
||||
with pytest.raises(ValueError):
|
||||
partition_json(text="{}")
|
||||
|
||||
|
||||
def test_partition_json_works_with_empty_list():
|
||||
assert partition_json(text="[]") == []
|
||||
|
||||
@ -288,6 +293,12 @@ def test_partition_json_from_text_prefers_metadata_last_modified():
|
||||
# ------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_partition_json_raises_with_unprocessable_json_array():
|
||||
text = '[{"invalid": "schema"}]'
|
||||
with pytest.raises(ValueError):
|
||||
partition_json(text=text)
|
||||
|
||||
|
||||
def test_partition_json_raises_with_unprocessable_json():
|
||||
# NOTE(robinson) - This is unprocessable because it is not a list of dicts,
|
||||
# per the Unstructured ISD format
|
||||
|
@ -189,8 +189,14 @@ def test_partition_ndjson_works_with_empty_string():
|
||||
assert partition_ndjson(text="") == []
|
||||
|
||||
|
||||
def test_partition_ndjson_works_with_empty_list():
|
||||
assert partition_ndjson(text="{}") == []
|
||||
def test_partition_ndjson_fails_with_empty_item():
|
||||
with pytest.raises(ValueError):
|
||||
partition_ndjson(text="{}")
|
||||
|
||||
|
||||
def test_partition_ndjson_fails_with_empty_list():
|
||||
with pytest.raises(ValueError):
|
||||
partition_ndjson(text="[]")
|
||||
|
||||
|
||||
def test_partition_ndjson_raises_with_too_many_specified():
|
||||
@ -293,6 +299,12 @@ def test_partition_ndjson_from_text_prefers_metadata_last_modified():
|
||||
# ------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_partition_json_raises_with_unprocessable_json():
|
||||
text = '{"invalid": "schema"}'
|
||||
with pytest.raises(ValueError):
|
||||
partition_ndjson(text=text)
|
||||
|
||||
|
||||
def test_partition_json_raises_with_invalid_json():
|
||||
text = '[{"hi": "there"}]]'
|
||||
with pytest.raises(ValueError):
|
||||
|
@ -74,6 +74,11 @@ def partition_json(
|
||||
try:
|
||||
element_dicts = json.loads(file_text)
|
||||
elements = elements_from_dicts(element_dicts)
|
||||
# if we found at least one json element, but no unstructured elements were found, throw 422
|
||||
if len(element_dicts) > 0 and len(elements) == 0:
|
||||
raise ValueError(
|
||||
"JSON cannot be partitioned. Schema does not match the Unstructured schema.",
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Not a valid json")
|
||||
|
||||
|
@ -75,6 +75,11 @@ def partition_ndjson(
|
||||
try:
|
||||
element_dicts = ndjson_loads(file_text)
|
||||
elements = elements_from_dicts(element_dicts)
|
||||
# if we found at least one json element, but no unstructured elements were found, throw 422
|
||||
if len(element_dicts) > 0 and len(elements) == 0:
|
||||
raise ValueError(
|
||||
"JSON cannot be partitioned. Schema does not match the Unstructured schema.",
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
raise ValueError("Not a valid ndjson")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user