mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
fix [NEX-28]: file_type is None for result_file_type in chunker partition json (#4022)
### Summary `'NoneType' object has no attribute 'partitioner_shortname'` due to `result_file_type = self._disambiguate_json_file_type` could return None for file type
This commit is contained in:
parent
5e43e36427
commit
a80decdbd4
@ -1,10 +1,11 @@
|
||||
## 0.17.11-dev1
|
||||
## 0.17.11-dev2
|
||||
|
||||
### Enhancements
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
- Fix type error when `result_file_type` is expected to be a `FileType` but is `None`
|
||||
- Fix chunking for elements with None text that has AttributeError 'NoneType' object has no attribute 'strip'.
|
||||
- Invalid elements IDs are not visible in VLM output. Parent-child hierarchy is now retrieved based on unstructured element ID, instead of id injected into HTML code of element.
|
||||
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.17.11-dev1" # pragma: no cover
|
||||
__version__ = "0.17.11-dev2" # pragma: no cover
|
||||
|
@ -222,18 +222,13 @@ class _FileTypeDetector:
|
||||
return FileType.from_mime_type(self._ctx.content_type)
|
||||
|
||||
@property
|
||||
def _disambiguate_json_file_type(self) -> FileType | None:
|
||||
"""Disambiguate JSON/NDJSON file-type based on file contents.
|
||||
|
||||
This method is used when the content-type is `application/json` and the file is not empty.
|
||||
"""
|
||||
if self._ctx.content_type is not None and self._ctx.content_type != "application/json":
|
||||
return None
|
||||
def _disambiguate_json_file_type(self) -> FileType:
|
||||
"""Disambiguate JSON/NDJSON file-type based on file contents."""
|
||||
if is_json_processable(file_text=self._ctx.text_head):
|
||||
return FileType.JSON
|
||||
if is_ndjson_processable(file_text=self._ctx.text_head):
|
||||
return FileType.NDJSON
|
||||
return None
|
||||
raise ValueError("Unable to process JSON file")
|
||||
|
||||
@property
|
||||
def _file_type_from_guessed_mime_type(self) -> FileType | None:
|
||||
|
@ -226,7 +226,7 @@ def partition(
|
||||
)
|
||||
return augment_metadata(elements)
|
||||
|
||||
if file_type.partitioner_shortname == "image":
|
||||
if file_type.partitioner_shortname and file_type.partitioner_shortname == "image":
|
||||
partition_image = partitioner_loader.get(file_type)
|
||||
elements = partition_image(
|
||||
filename=filename,
|
||||
|
Loading…
x
Reference in New Issue
Block a user