diff --git a/CHANGELOG.md b/CHANGELOG.md index 7982a7165..51f9cbf5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.14.4-dev5 +## 0.14.4-dev6 ### Enhancements @@ -12,6 +12,7 @@ ### Fixes +* **Remove root handlers in ingest logger**. Removes root handlers in ingest loggers to ensure secrets aren't accidentally exposed in Colab notebooks. * **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized. * **Clarified dependence on particular version of `python-docx`** Pinned `python-docx` version to ensure a particular method `unstructured` uses is included. * **Ingest preserves original file extension** Ingest V2 introduced a change that dropped the original extension for upgraded connectors. This reverts that change. diff --git a/unstructured/__version__.py b/unstructured/__version__.py index ff19fe345..c14a58bf3 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.14.4-dev5" # pragma: no cover +__version__ = "0.14.4-dev6" # pragma: no cover diff --git a/unstructured/ingest/logger.py b/unstructured/ingest/logger.py index 6970c0ef0..ed4e7180e 100644 --- a/unstructured/ingest/logger.py +++ b/unstructured/ingest/logger.py @@ -94,6 +94,15 @@ class SensitiveFormatter(logging.Formatter): return redact_jsons(s) +def remove_root_handlers(logger: logging.Logger) -> None: + # NOTE(robinson) - in some environments such as Google Colab, there is a root handler + # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs. + # Removing these when they exist prevents this behavior + if logger.root.hasHandlers(): + for handler in logger.root.handlers: + logger.root.removeHandler(handler) + + def ingest_log_streaming_init(level: int) -> None: handler = logging.StreamHandler() handler.name = "ingest_log_handler" @@ -104,6 +113,7 @@ def ingest_log_streaming_init(level: int) -> None: if "ingest_log_handler" not in [h.name for h in logger.handlers]: logger.addHandler(handler) + remove_root_handlers(logger) logger.setLevel(level) @@ -116,4 +126,5 @@ def make_default_logger(level: int) -> logging.Logger: handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(level) + remove_root_handlers(logger) return logger