mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-14 04:25:54 +00:00

Google Drive Service account key can be a dict or a file path(str) We have successfully been using the path. But the dict can also end up being stored as a string that needs to be deserialized. The deserialization can have issues with single and double quotes.
131 lines
3.7 KiB
Python
131 lines
3.7 KiB
Python
import json
|
|
import typing as t
|
|
from dataclasses import dataclass, field
|
|
|
|
from unstructured.ingest.cli.utils import extract_config
|
|
from unstructured.ingest.interfaces import BaseConfig
|
|
from unstructured.ingest.utils.string_utils import json_to_dict
|
|
|
|
|
|
@dataclass
|
|
class A(BaseConfig):
|
|
a: str
|
|
|
|
|
|
@dataclass
|
|
class B(BaseConfig):
|
|
a: A
|
|
b: int
|
|
|
|
|
|
flat_data = {"a": "test", "b": 4, "c": True}
|
|
|
|
|
|
def test_extract_config_concrete():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
b: B
|
|
c: bool
|
|
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_extract_config_optional():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
c: bool
|
|
b: t.Optional[B] = None
|
|
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_extract_config_union():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
c: bool
|
|
b: t.Optional[t.Union[B, int]] = None
|
|
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"b": 4, "c": True}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_extract_config_list():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
c: t.List[int]
|
|
b: B
|
|
|
|
flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]}
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_extract_config_optional_list():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
b: B
|
|
c: t.Optional[t.List[int]] = None
|
|
|
|
flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]}
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_extract_config_dataclass_list():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
c: bool
|
|
b: t.List[B] = field(default_factory=list)
|
|
|
|
flat_data = {"a": "test", "c": True}
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"b": [], "c": True}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_extract_config_dict():
|
|
@dataclass
|
|
class C(BaseConfig):
|
|
c: bool
|
|
b: t.Dict[str, B] = field(default_factory=dict)
|
|
|
|
flat_data = {"c": True}
|
|
c = extract_config(flat_data=flat_data, config=C)
|
|
expected_result = {"c": True, "b": {}}
|
|
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
|
|
|
|
|
|
def test_json_to_dict_valid_json():
|
|
json_string = '{"key": "value"}'
|
|
expected_result = {"key": "value"}
|
|
assert json_to_dict(json_string) == expected_result
|
|
assert isinstance(json_to_dict(json_string), dict)
|
|
|
|
|
|
def test_json_to_dict_malformed_json():
|
|
json_string = '{"key": "value"'
|
|
expected_result = '{"key": "value"'
|
|
assert json_to_dict(json_string) == expected_result
|
|
assert isinstance(json_to_dict(json_string), str)
|
|
|
|
|
|
def test_json_to_dict_single_quotes():
|
|
json_string = "{'key': 'value'}"
|
|
expected_result = {"key": "value"}
|
|
assert json_to_dict(json_string) == expected_result
|
|
assert isinstance(json_to_dict(json_string), dict)
|
|
|
|
|
|
def test_json_to_dict_path():
|
|
json_string = "/path/to/file.json"
|
|
expected_result = "/path/to/file.json"
|
|
assert json_to_dict(json_string) == expected_result
|
|
assert isinstance(json_to_dict(json_string), str)
|