fix [NEX-28]: file_type is None for result_file_type in chunker partition json (#4022)

### Summary
`'NoneType' object has no attribute 'partitioner_shortname'` due to
`result_file_type = self._disambiguate_json_file_type` could return None
for file type
This commit is contained in:
Yuming Long 2025-06-13 08:19:09 -07:00 committed by GitHub
parent 5e43e36427
commit a80decdbd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 7 additions and 11 deletions

View File

@ -1,10 +1,11 @@
## 0.17.11-dev1
## 0.17.11-dev2
### Enhancements
### Features
### Fixes
- Fix type error when `result_file_type` is expected to be a `FileType` but is `None`
- Fix chunking for elements with None text that has AttributeError 'NoneType' object has no attribute 'strip'.
- Invalid elements IDs are not visible in VLM output. Parent-child hierarchy is now retrieved based on unstructured element ID, instead of id injected into HTML code of element.

View File

@ -1 +1 @@
__version__ = "0.17.11-dev1" # pragma: no cover
__version__ = "0.17.11-dev2" # pragma: no cover

View File

@ -222,18 +222,13 @@ class _FileTypeDetector:
return FileType.from_mime_type(self._ctx.content_type)
@property
def _disambiguate_json_file_type(self) -> FileType | None:
"""Disambiguate JSON/NDJSON file-type based on file contents.
This method is used when the content-type is `application/json` and the file is not empty.
"""
if self._ctx.content_type is not None and self._ctx.content_type != "application/json":
return None
def _disambiguate_json_file_type(self) -> FileType:
"""Disambiguate JSON/NDJSON file-type based on file contents."""
if is_json_processable(file_text=self._ctx.text_head):
return FileType.JSON
if is_ndjson_processable(file_text=self._ctx.text_head):
return FileType.NDJSON
return None
raise ValueError("Unable to process JSON file")
@property
def _file_type_from_guessed_mime_type(self) -> FileType | None:

View File

@ -226,7 +226,7 @@ def partition(
)
return augment_metadata(elements)
if file_type.partitioner_shortname == "image":
if file_type.partitioner_shortname and file_type.partitioner_shortname == "image":
partition_image = partitioner_loader.get(file_type)
elements = partition_image(
filename=filename,