mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-09-26 17:02:31 +00:00
fix: remote root handlers when they exist (#3128)
### Summary In some environments, such as Google Colab, loggers have a root handling that did not mask sensitive values. As a result, secrets such as API keys appeared in the logs. The PR removes root handlers when they exist to ensure sensitive values are handler properly. ### Testing Run the following in a Colab notebook. You should see two log outputs, one with the API key masked and one with it exposed. ``` !pip install unstructured ``` ```python import logging import json from unstructured.ingest.interfaces import ( ChunkingConfig, EmbeddingConfig, PartitionConfig, ProcessorConfig, ReadConfig, ) partition_config = PartitionConfig( partition_by_api=True, api_key="super secret", ) from unstructured.ingest.logger import ingest_log_streaming_init ingest_log_streaming_init(logging.INFO) logger = logging.getLogger("unstructured.ingest") logger.setLevel(logging.INFO) logger.info( f"Running partition node to extract content from json files. " f"Config: {partition_config.to_json()}, " ) ``` Now replace the first cell with the following and rerun the Python code. Only the masked logging output should remain. ``` !git clone https://github.com/Unstructured-IO/unstructured.git && cd unstructured && git checkout fix/rm-log-dupes && pip install -e . ```
This commit is contained in:
parent
54c1e4e57f
commit
1b43102762
@ -1,4 +1,4 @@
|
|||||||
## 0.14.4-dev5
|
## 0.14.4-dev6
|
||||||
|
|
||||||
### Enhancements
|
### Enhancements
|
||||||
|
|
||||||
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
* **Remove root handlers in ingest logger**. Removes root handlers in ingest loggers to ensure secrets aren't accidentally exposed in Colab notebooks.
|
||||||
* **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized.
|
* **Fix V2 S3 Destination Connector authentication** Fixes bugs with S3 Destination Connector where the connection config was neither registered nor properly deserialized.
|
||||||
* **Clarified dependence on particular version of `python-docx`** Pinned `python-docx` version to ensure a particular method `unstructured` uses is included.
|
* **Clarified dependence on particular version of `python-docx`** Pinned `python-docx` version to ensure a particular method `unstructured` uses is included.
|
||||||
* **Ingest preserves original file extension** Ingest V2 introduced a change that dropped the original extension for upgraded connectors. This reverts that change.
|
* **Ingest preserves original file extension** Ingest V2 introduced a change that dropped the original extension for upgraded connectors. This reverts that change.
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.14.4-dev5" # pragma: no cover
|
__version__ = "0.14.4-dev6" # pragma: no cover
|
||||||
|
@ -94,6 +94,15 @@ class SensitiveFormatter(logging.Formatter):
|
|||||||
return redact_jsons(s)
|
return redact_jsons(s)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_root_handlers(logger: logging.Logger) -> None:
|
||||||
|
# NOTE(robinson) - in some environments such as Google Colab, there is a root handler
|
||||||
|
# that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
|
||||||
|
# Removing these when they exist prevents this behavior
|
||||||
|
if logger.root.hasHandlers():
|
||||||
|
for handler in logger.root.handlers:
|
||||||
|
logger.root.removeHandler(handler)
|
||||||
|
|
||||||
|
|
||||||
def ingest_log_streaming_init(level: int) -> None:
|
def ingest_log_streaming_init(level: int) -> None:
|
||||||
handler = logging.StreamHandler()
|
handler = logging.StreamHandler()
|
||||||
handler.name = "ingest_log_handler"
|
handler.name = "ingest_log_handler"
|
||||||
@ -104,6 +113,7 @@ def ingest_log_streaming_init(level: int) -> None:
|
|||||||
if "ingest_log_handler" not in [h.name for h in logger.handlers]:
|
if "ingest_log_handler" not in [h.name for h in logger.handlers]:
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
remove_root_handlers(logger)
|
||||||
logger.setLevel(level)
|
logger.setLevel(level)
|
||||||
|
|
||||||
|
|
||||||
@ -116,4 +126,5 @@ def make_default_logger(level: int) -> logging.Logger:
|
|||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
logger.setLevel(level)
|
logger.setLevel(level)
|
||||||
|
remove_root_handlers(logger)
|
||||||
return logger
|
return logger
|
||||||
|
Loading…
x
Reference in New Issue
Block a user