79 lines
2.3 KiB
Python
Raw Normal View History

import json
import pytest
from unstructured.ingest.logger import (
default_is_data_sensitive,
hide_sensitive_fields,
redact_jsons,
)
@pytest.mark.parametrize(
("key", "value", "is_sensitive"),
[
("username", "john_smith", False),
("password", "13?H%", True),
("token", "123", True),
("AWS_CREDENTIAL", "aws_credential", True),
("AWS_KEY", None, False),
],
)
def test_default_is_sensitive(key, value, is_sensitive):
assert default_is_data_sensitive(key, value) == is_sensitive
def test_hide_sensitive_fields():
d = {
"username": "john_smith",
"password": "13?H%",
"inner": {
"token": "123",
"AWS_KEY": None,
"inner_j_string": json.dumps(
{"account_name": "secret name", "client_id": 123, "timestamp": 123}
),
},
}
redacted_d = hide_sensitive_fields(d)
expected_d = {
"password": "*******",
"username": "john_smith",
"inner": {
"token": "*******",
"AWS_KEY": None,
"inner_j_string": json.dumps(
{"account_name": "*******", "client_id": "*******", "timestamp": 123}
),
},
}
assert redacted_d == expected_d
def test_redact_jsons():
d1 = {
"username": "john_smith",
"password": "13?H%",
"inner": {
"token": "123",
"AWS_KEY": None,
"inner_j_string": json.dumps(
{"account_name": "secret name", "client_id": 123, "timestamp": 123}
),
},
}
d2 = {"username": "tim67", "update_time": 456}
d3 = {"account_name": "top secret", "host": "http://localhost:8888"}
sensitive_string = f"Some topic secret info ({json.dumps(d1)} regarding {d2} and {d3})"
expected_string = (
'Some topic secret info ({"username": "john_smith", "password": "*******", '
'"inner": {"token": "*******", "AWS_KEY": null, "inner_j_string": '
'"{\\"account_name\\": \\"*******\\", \\"client_id\\": \\"*******\\", '
'\\"timestamp\\": 123}"}} regarding {"username": "tim67", "update_time": 456} '
'and {"account_name": "*******", "host": "http://localhost:8888"})'
)
redacted_string = redact_jsons(sensitive_string)
assert redacted_string == expected_string