mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-09-25 16:29:53 +00:00
fix: remote root handlers when they exist (#3128)
### Summary In some environments, such as Google Colab, loggers have a root handling that did not mask sensitive values. As a result, secrets such as API keys appeared in the logs. The PR removes root handlers when they exist to ensure sensitive values are handler properly. ### Testing Run the following in a Colab notebook. You should see two log outputs, one with the API key masked and one with it exposed. ``` !pip install unstructured ``` ```python import logging import json from unstructured.ingest.interfaces import ( ChunkingConfig, EmbeddingConfig, PartitionConfig, ProcessorConfig, ReadConfig, ) partition_config = PartitionConfig( partition_by_api=True, api_key="super secret", ) from unstructured.ingest.logger import ingest_log_streaming_init ingest_log_streaming_init(logging.INFO) logger = logging.getLogger("unstructured.ingest") logger.setLevel(logging.INFO) logger.info( f"Running partition node to extract content from json files. " f"Config: {partition_config.to_json()}, " ) ``` Now replace the first cell with the following and rerun the Python code. Only the masked logging output should remain. ``` !git clone https://github.com/Unstructured-IO/unstructured.git && cd unstructured && git checkout fix/rm-log-dupes && pip install -e . ```
This commit is contained in:
parent
54c1e4e57f
commit
1b43102762
@ -1,4 +1,4 @@
|
||||
## 0.14.4-dev5
|
||||
## 0.14.4-dev6
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
* **Remove root handlers in ingest logger**. Removes root handlers in ingest loggers to ensure secrets aren't accidentally exposed in Colab notebooks.
|
||||
* **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized.
|
||||
* **Clarified dependence on particular version of `python-docx`** Pinned `python-docx` version to ensure a particular method `unstructured` uses is included.
|
||||
* **Ingest preserves original file extension** Ingest V2 introduced a change that dropped the original extension for upgraded connectors. This reverts that change.
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.14.4-dev5" # pragma: no cover
|
||||
__version__ = "0.14.4-dev6" # pragma: no cover
|
||||
|
@ -94,6 +94,15 @@ class SensitiveFormatter(logging.Formatter):
|
||||
return redact_jsons(s)
|
||||
|
||||
|
||||
def remove_root_handlers(logger: logging.Logger) -> None:
|
||||
# NOTE(robinson) - in some environments such as Google Colab, there is a root handler
|
||||
# that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
|
||||
# Removing these when they exist prevents this behavior
|
||||
if logger.root.hasHandlers():
|
||||
for handler in logger.root.handlers:
|
||||
logger.root.removeHandler(handler)
|
||||
|
||||
|
||||
def ingest_log_streaming_init(level: int) -> None:
|
||||
handler = logging.StreamHandler()
|
||||
handler.name = "ingest_log_handler"
|
||||
@ -104,6 +113,7 @@ def ingest_log_streaming_init(level: int) -> None:
|
||||
if "ingest_log_handler" not in [h.name for h in logger.handlers]:
|
||||
logger.addHandler(handler)
|
||||
|
||||
remove_root_handlers(logger)
|
||||
logger.setLevel(level)
|
||||
|
||||
|
||||
@ -116,4 +126,5 @@ def make_default_logger(level: int) -> logging.Logger:
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(level)
|
||||
remove_root_handlers(logger)
|
||||
return logger
|
||||
|
Loading…
x
Reference in New Issue
Block a user