diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d18863cf..ba87df9e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,11 @@ -## 0.17.11-dev1 +## 0.17.11-dev2 ### Enhancements ### Features ### Fixes +- Fix type error when `result_file_type` is expected to be a `FileType` but is `None` - Fix chunking for elements with None text that has AttributeError 'NoneType' object has no attribute 'strip'. - Invalid elements IDs are not visible in VLM output. Parent-child hierarchy is now retrieved based on unstructured element ID, instead of id injected into HTML code of element. diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 1909f0042..b5c0cb697 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.17.11-dev1" # pragma: no cover +__version__ = "0.17.11-dev2" # pragma: no cover diff --git a/unstructured/file_utils/filetype.py b/unstructured/file_utils/filetype.py index 81557562f..1206b1e8b 100644 --- a/unstructured/file_utils/filetype.py +++ b/unstructured/file_utils/filetype.py @@ -222,18 +222,13 @@ class _FileTypeDetector: return FileType.from_mime_type(self._ctx.content_type) @property - def _disambiguate_json_file_type(self) -> FileType | None: - """Disambiguate JSON/NDJSON file-type based on file contents. - - This method is used when the content-type is `application/json` and the file is not empty. - """ - if self._ctx.content_type is not None and self._ctx.content_type != "application/json": - return None + def _disambiguate_json_file_type(self) -> FileType: + """Disambiguate JSON/NDJSON file-type based on file contents.""" if is_json_processable(file_text=self._ctx.text_head): return FileType.JSON if is_ndjson_processable(file_text=self._ctx.text_head): return FileType.NDJSON - return None + raise ValueError("Unable to process JSON file") @property def _file_type_from_guessed_mime_type(self) -> FileType | None: diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py index fa4c05034..d8555e824 100644 --- a/unstructured/partition/auto.py +++ b/unstructured/partition/auto.py @@ -226,7 +226,7 @@ def partition( ) return augment_metadata(elements) - if file_type.partitioner_shortname == "image": + if file_type.partitioner_shortname and file_type.partitioner_shortname == "image": partition_image = partitioner_loader.get(file_type) elements = partition_image( filename=filename,