From 3d21b4098eec7b3864c4be21c70f2388ec643a3c Mon Sep 17 00:00:00 2001 From: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com> Date: Fri, 10 Mar 2023 18:26:08 +0100 Subject: [PATCH] enhancement: improve `detect_filetype` warning to include filename (#355) * Improve warning to include filename if provided * Update changelog & version --- CHANGELOG.md | 4 ++-- unstructured/__version__.py | 2 +- unstructured/file_utils/filetype.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c65703258..4bbd0ba36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,7 @@ -## 0.5.4-dev1 +## 0.5.4-dev2 ### Enhancements - * Add `FsspecConnector` to easily integrate any existing `fsspec` filesystem as a connector. * Rename `s3_connector.py` to `s3.py` for readability and consistency with the rest of the connectors. @@ -11,6 +10,7 @@ * Adds an `UNSTRUCTURED_LANGUAGE_CHECKS` environment variable to control whether or not language specific checks like vocabulary and POS tagging are applied. Set to `"true"` for higher resolution partitioning and `"false"` for faster processing. +* Improves `detect_filetype` warning to include filename when provided. ### Features diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 7194bf5b9..15cc6c086 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.5.4-dev1" # pragma: no cover +__version__ = "0.5.4-dev2" # pragma: no cover diff --git a/unstructured/file_utils/filetype.py b/unstructured/file_utils/filetype.py index 5685f7e9d..f6ff8902f 100644 --- a/unstructured/file_utils/filetype.py +++ b/unstructured/file_utils/filetype.py @@ -229,7 +229,8 @@ def detect_filetype( return EXT_TO_FILETYPE.get(extension.lower(), filetype) logger.warning( - f"MIME type was {mime_type}. This file type is not currently supported in unstructured.", + f"The MIME type{f' of {filename!r}' if filename else ''} is {mime_type!r}. " + "This file type is not currently supported in unstructured.", ) return FileType.UNK