David Potter 43250d5576
bug CORE-3971: fix deserialization in google-drive source connector key path (#2586)
Google Drive Service account key can be a dict or a file path(str)

We have successfully been using the path. But the dict can also end up
being stored as a string that needs to be deserialized. The
deserialization can have issues with single and double quotes.
2024-03-03 15:30:35 +00:00

131 lines
3.7 KiB
Python

import json
import typing as t
from dataclasses import dataclass, field
from unstructured.ingest.cli.utils import extract_config
from unstructured.ingest.interfaces import BaseConfig
from unstructured.ingest.utils.string_utils import json_to_dict
@dataclass
class A(BaseConfig):
a: str
@dataclass
class B(BaseConfig):
a: A
b: int
flat_data = {"a": "test", "b": 4, "c": True}
def test_extract_config_concrete():
@dataclass
class C(BaseConfig):
b: B
c: bool
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_extract_config_optional():
@dataclass
class C(BaseConfig):
c: bool
b: t.Optional[B] = None
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_extract_config_union():
@dataclass
class C(BaseConfig):
c: bool
b: t.Optional[t.Union[B, int]] = None
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"b": 4, "c": True}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_extract_config_list():
@dataclass
class C(BaseConfig):
c: t.List[int]
b: B
flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]}
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_extract_config_optional_list():
@dataclass
class C(BaseConfig):
b: B
c: t.Optional[t.List[int]] = None
flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]}
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_extract_config_dataclass_list():
@dataclass
class C(BaseConfig):
c: bool
b: t.List[B] = field(default_factory=list)
flat_data = {"a": "test", "c": True}
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"b": [], "c": True}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_extract_config_dict():
@dataclass
class C(BaseConfig):
c: bool
b: t.Dict[str, B] = field(default_factory=dict)
flat_data = {"c": True}
c = extract_config(flat_data=flat_data, config=C)
expected_result = {"c": True, "b": {}}
assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True)
def test_json_to_dict_valid_json():
json_string = '{"key": "value"}'
expected_result = {"key": "value"}
assert json_to_dict(json_string) == expected_result
assert isinstance(json_to_dict(json_string), dict)
def test_json_to_dict_malformed_json():
json_string = '{"key": "value"'
expected_result = '{"key": "value"'
assert json_to_dict(json_string) == expected_result
assert isinstance(json_to_dict(json_string), str)
def test_json_to_dict_single_quotes():
json_string = "{'key': 'value'}"
expected_result = {"key": "value"}
assert json_to_dict(json_string) == expected_result
assert isinstance(json_to_dict(json_string), dict)
def test_json_to_dict_path():
json_string = "/path/to/file.json"
expected_result = "/path/to/file.json"
assert json_to_dict(json_string) == expected_result
assert isinstance(json_to_dict(json_string), str)