mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
fix [NEX-28]: file_type is None for result_file_type in chunker partition json (#4022)
### Summary `'NoneType' object has no attribute 'partitioner_shortname'` due to `result_file_type = self._disambiguate_json_file_type` could return None for file type
This commit is contained in:
parent
5e43e36427
commit
a80decdbd4
@ -1,10 +1,11 @@
|
|||||||
## 0.17.11-dev1
|
## 0.17.11-dev2
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
- Fix type error when `result_file_type` is expected to be a `FileType` but is `None`
|
||||||
- Fix chunking for elements with None text that has AttributeError 'NoneType' object has no attribute 'strip'.
|
- Fix chunking for elements with None text that has AttributeError 'NoneType' object has no attribute 'strip'.
|
||||||
- Invalid elements IDs are not visible in VLM output. Parent-child hierarchy is now retrieved based on unstructured element ID, instead of id injected into HTML code of element.
|
- Invalid elements IDs are not visible in VLM output. Parent-child hierarchy is now retrieved based on unstructured element ID, instead of id injected into HTML code of element.
|
||||||
|
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.17.11-dev1" # pragma: no cover
|
__version__ = "0.17.11-dev2" # pragma: no cover
|
||||||
|
@ -222,18 +222,13 @@ class _FileTypeDetector:
|
|||||||
return FileType.from_mime_type(self._ctx.content_type)
|
return FileType.from_mime_type(self._ctx.content_type)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _disambiguate_json_file_type(self) -> FileType | None:
|
def _disambiguate_json_file_type(self) -> FileType:
|
||||||
"""Disambiguate JSON/NDJSON file-type based on file contents.
|
"""Disambiguate JSON/NDJSON file-type based on file contents."""
|
||||||
|
|
||||||
This method is used when the content-type is `application/json` and the file is not empty.
|
|
||||||
"""
|
|
||||||
if self._ctx.content_type is not None and self._ctx.content_type != "application/json":
|
|
||||||
return None
|
|
||||||
if is_json_processable(file_text=self._ctx.text_head):
|
if is_json_processable(file_text=self._ctx.text_head):
|
||||||
return FileType.JSON
|
return FileType.JSON
|
||||||
if is_ndjson_processable(file_text=self._ctx.text_head):
|
if is_ndjson_processable(file_text=self._ctx.text_head):
|
||||||
return FileType.NDJSON
|
return FileType.NDJSON
|
||||||
return None
|
raise ValueError("Unable to process JSON file")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _file_type_from_guessed_mime_type(self) -> FileType | None:
|
def _file_type_from_guessed_mime_type(self) -> FileType | None:
|
||||||
|
@ -226,7 +226,7 @@ def partition(
|
|||||||
)
|
)
|
||||||
return augment_metadata(elements)
|
return augment_metadata(elements)
|
||||||
|
|
||||||
if file_type.partitioner_shortname == "image":
|
if file_type.partitioner_shortname and file_type.partitioner_shortname == "image":
|
||||||
partition_image = partitioner_loader.get(file_type)
|
partition_image = partitioner_loader.get(file_type)
|
||||||
elements = partition_image(
|
elements = partition_image(
|
||||||
filename=filename,
|
filename=filename,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user