mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-08-24 16:50:18 +00:00
fix: update flatten dict to support flattening tuples (#2423)
This PR updates flatten_dict function to support flattening tuples. This is necessary for objects like Coordinates, when the object is not written to the disk, therefore not being converted to a list before getting flattened.
This commit is contained in:
parent
fa9f6ccc17
commit
a9ad8ac8d1
@ -27,6 +27,7 @@
|
||||
* **Fix the serialization of the Elasticsearch destination connector.** Presence of the _client object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization.
|
||||
* **Fix the serialization of the Postgres destination connector.** Presence of the _client object breaks serialization due to TypeError: cannot pickle '_thread.lock' object. This removes that object before serialization.
|
||||
* **Fix documentation and sample code for Chroma.** Was pointing to wrong examples..
|
||||
* **Fix flatten_dict to be able to flatten tuples inside dicts** Update flatten_dict function to support flattening tuples inside dicts. This is necessary for objects like Coordinates, when the object is not written to the disk, therefore not being converted to a list before getting flattened (still being a tuple).
|
||||
|
||||
## 0.12.0
|
||||
|
||||
|
@ -409,6 +409,13 @@ def test_flatten_nested_dict():
|
||||
assert base.flatten_dict(dictionary) == expected_result
|
||||
|
||||
|
||||
def test_flatten_dict_with_tuples():
|
||||
"""Flattening a dictionary with tuples"""
|
||||
dictionary = {"a": 1, "b": (2, 3, 4), "c": {"d": 5, "e": (6, 7)}}
|
||||
expected_result = {"a": 1, "b": (2, 3, 4), "c_d": 5, "c_e": (6, 7)}
|
||||
assert base.flatten_dict(dictionary) == expected_result
|
||||
|
||||
|
||||
def test_flatten_dict_with_lists():
|
||||
"""Flattening a dictionary with lists"""
|
||||
dictionary = {"a": 1, "b": [2, 3, 4], "c": {"d": 5, "e": [6, 7]}}
|
||||
@ -432,6 +439,13 @@ def test_flatten_dict_alt_separator():
|
||||
assert base.flatten_dict(dictionary, separator=separator) == expected_result
|
||||
|
||||
|
||||
def test_flatten_dict_flatten_tuple():
|
||||
"""Flattening a dictionary with flatten_lists set to True, to flatten tuples"""
|
||||
dictionary = {"a": 1, "b": (2, 3, 4), "c": {"d": 5, "e": (6, 7)}}
|
||||
expected_result = {"a": 1, "b_0": 2, "b_1": 3, "b_2": 4, "c_d": 5, "c_e_0": 6, "c_e_1": 7}
|
||||
assert base.flatten_dict(dictionary, flatten_lists=True) == expected_result
|
||||
|
||||
|
||||
def test_flatten_dict_flatten_list():
|
||||
"""Flattening a dictionary with flatten_lists set to True"""
|
||||
dictionary = {"a": 1, "b": [2, 3, 4], "c": {"d": 5, "e": [6, 7]}}
|
||||
|
@ -181,7 +181,7 @@ def flatten_dict(
|
||||
):
|
||||
"""Flattens a nested dictionary into a single level dictionary. keys_to_omit is a list of keys
|
||||
that don't get flattened. If omitting a nested key, format as {parent_key}{separator}{key}.
|
||||
If flatten_lists is True, then lists are flattened as well."""
|
||||
If flatten_lists is True, then lists and tuples are flattened as well."""
|
||||
keys_to_omit = keys_to_omit if keys_to_omit else []
|
||||
flattened_dict = {}
|
||||
for key, value in dictionary.items():
|
||||
@ -192,7 +192,7 @@ def flatten_dict(
|
||||
flattened_dict.update(
|
||||
flatten_dict(value, new_key, separator, flatten_lists, keys_to_omit=keys_to_omit),
|
||||
)
|
||||
elif isinstance(value, list) and flatten_lists:
|
||||
elif isinstance(value, (list, tuple)) and flatten_lists:
|
||||
for index, item in enumerate(value):
|
||||
flattened_dict.update(
|
||||
flatten_dict(
|
||||
|
Loading…
x
Reference in New Issue
Block a user