mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-03 15:11:30 +00:00
79 lines
2.3 KiB
Python
79 lines
2.3 KiB
Python
![]() |
import json
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from unstructured.ingest.logger import (
|
||
|
default_is_data_sensitive,
|
||
|
hide_sensitive_fields,
|
||
|
redact_jsons,
|
||
|
)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
("key", "value", "is_sensitive"),
|
||
|
[
|
||
|
("username", "john_smith", False),
|
||
|
("password", "13?H%", True),
|
||
|
("token", "123", True),
|
||
|
("AWS_CREDENTIAL", "aws_credential", True),
|
||
|
("AWS_KEY", None, False),
|
||
|
],
|
||
|
)
|
||
|
def test_default_is_sensitive(key, value, is_sensitive):
|
||
|
assert default_is_data_sensitive(key, value) == is_sensitive
|
||
|
|
||
|
|
||
|
def test_hide_sensitive_fields():
|
||
|
d = {
|
||
|
"username": "john_smith",
|
||
|
"password": "13?H%",
|
||
|
"inner": {
|
||
|
"token": "123",
|
||
|
"AWS_KEY": None,
|
||
|
"inner_j_string": json.dumps(
|
||
|
{"account_name": "secret name", "client_id": 123, "timestamp": 123}
|
||
|
),
|
||
|
},
|
||
|
}
|
||
|
redacted_d = hide_sensitive_fields(d)
|
||
|
expected_d = {
|
||
|
"password": "*******",
|
||
|
"username": "john_smith",
|
||
|
"inner": {
|
||
|
"token": "*******",
|
||
|
"AWS_KEY": None,
|
||
|
"inner_j_string": json.dumps(
|
||
|
{"account_name": "*******", "client_id": "*******", "timestamp": 123}
|
||
|
),
|
||
|
},
|
||
|
}
|
||
|
assert redacted_d == expected_d
|
||
|
|
||
|
|
||
|
def test_redact_jsons():
|
||
|
d1 = {
|
||
|
"username": "john_smith",
|
||
|
"password": "13?H%",
|
||
|
"inner": {
|
||
|
"token": "123",
|
||
|
"AWS_KEY": None,
|
||
|
"inner_j_string": json.dumps(
|
||
|
{"account_name": "secret name", "client_id": 123, "timestamp": 123}
|
||
|
),
|
||
|
},
|
||
|
}
|
||
|
|
||
|
d2 = {"username": "tim67", "update_time": 456}
|
||
|
d3 = {"account_name": "top secret", "host": "http://localhost:8888"}
|
||
|
|
||
|
sensitive_string = f"Some topic secret info ({json.dumps(d1)} regarding {d2} and {d3})"
|
||
|
expected_string = (
|
||
|
'Some topic secret info ({"username": "john_smith", "password": "*******", '
|
||
|
'"inner": {"token": "*******", "AWS_KEY": null, "inner_j_string": '
|
||
|
'"{\\"account_name\\": \\"*******\\", \\"client_id\\": \\"*******\\", '
|
||
|
'\\"timestamp\\": 123}"}} regarding {"username": "tim67", "update_time": 456} '
|
||
|
'and {"account_name": "*******", "host": "http://localhost:8888"})'
|
||
|
)
|
||
|
redacted_string = redact_jsons(sensitive_string)
|
||
|
assert redacted_string == expected_string
|