mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-29 07:59:27 +00:00
fix: Update the de/serialization with schema utils (#9526)
* Update the util methods * Update tests * fix tests * schema fix * Add json schema for tuples and sets * Add proper conversion for sets and tuples * Adjust typing * PR comments * Linting * Optimize deserialization * remove TODO * PR comments * PR comments * Update tests and deserialization error * Support legacy deserialization * Update deprecating warning * Update test
This commit is contained in:
parent
d14f5dca0e
commit
9ed0b9b0bc
@ -2,6 +2,7 @@
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import warnings
|
||||
from typing import Any, Dict
|
||||
|
||||
from haystack.core.errors import DeserializationError, SerializationError
|
||||
@ -54,10 +55,9 @@ def deserialize_class_instance(data: Dict[str, Any]) -> Any:
|
||||
return obj_class.from_dict(data["data"])
|
||||
|
||||
|
||||
# TODO: Make this function public once its implementation is finalized and tested
|
||||
def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _serialize_value_with_schema(payload: Any) -> Dict[str, Any]:
|
||||
"""
|
||||
Serializes a dictionary into a schema-aware format suitable for storage or transmission.
|
||||
Serializes a value into a schema-aware format suitable for storage or transmission.
|
||||
|
||||
The output format separates the schema information from the actual data, making it easier
|
||||
to deserialize complex nested structures correctly.
|
||||
@ -66,63 +66,69 @@ def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
- Objects with to_dict() methods (e.g. dataclasses)
|
||||
- Objects with __dict__ attributes
|
||||
- Dictionaries
|
||||
- Lists, tuples, and sets
|
||||
- Lists, tuples, and sets. Lists with mixed types are not supported.
|
||||
- Primitive types (str, int, float, bool, None)
|
||||
|
||||
:param value: The value to serialize
|
||||
:param payload: The value to serialize (can be any type)
|
||||
:returns: The serialized dict representation of the given value. Contains two keys:
|
||||
- "schema": Contains type information for each field
|
||||
- "data": Contains the actual data in a simplified format
|
||||
- "serialization_schema": Contains type information for each field.
|
||||
- "serialized_data": Contains the actual data in a simplified format.
|
||||
|
||||
"""
|
||||
schema: Dict[str, Any] = {}
|
||||
data: Dict[str, Any] = {}
|
||||
# Handle dictionary case - iterate through fields
|
||||
if isinstance(payload, dict):
|
||||
schema: Dict[str, Any] = {}
|
||||
data: Dict[str, Any] = {}
|
||||
|
||||
for field, val in payload.items():
|
||||
# 1) Handle dataclass‐style objects
|
||||
if hasattr(val, "to_dict") and callable(val.to_dict):
|
||||
type_name = generate_qualified_class_name(type(val))
|
||||
pure = _convert_to_basic_types(val.to_dict())
|
||||
schema[field] = {"type": type_name}
|
||||
data[field] = pure
|
||||
for field, val in payload.items():
|
||||
# Recursively serialize each field
|
||||
serialized_value = _serialize_value_with_schema(val)
|
||||
schema[field] = serialized_value["serialization_schema"]
|
||||
data[field] = serialized_value["serialized_data"]
|
||||
|
||||
# 2) Arbitrary objects w/ __dict__
|
||||
elif hasattr(val, "__dict__"):
|
||||
type_name = generate_qualified_class_name(type(val))
|
||||
pure = _convert_to_basic_types(vars(val))
|
||||
schema[field] = {"type": type_name}
|
||||
data[field] = pure
|
||||
return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
|
||||
|
||||
# 3) Dicts → "object"
|
||||
elif isinstance(val, dict):
|
||||
pure = _convert_to_basic_types(val)
|
||||
schema[field] = {"type": "object"}
|
||||
data[field] = pure
|
||||
# Handle array case - iterate through elements
|
||||
elif isinstance(payload, (list, tuple, set)):
|
||||
# Convert to list for consistent handling
|
||||
pure_list = _convert_to_basic_types(list(payload))
|
||||
|
||||
# 4) Sequences → "array"
|
||||
elif isinstance(val, (list, tuple, set)):
|
||||
# pure data
|
||||
pure_list = _convert_to_basic_types(list(val))
|
||||
# determine item type from first element (if any)
|
||||
if val:
|
||||
first = next(iter(val))
|
||||
if hasattr(first, "to_dict") and callable(first.to_dict) or hasattr(first, "__dict__"):
|
||||
item_type = generate_qualified_class_name(type(first))
|
||||
else:
|
||||
item_type = _primitive_schema_type(first)
|
||||
else:
|
||||
item_type = "any"
|
||||
|
||||
schema[field] = {"type": "array", "items": {"type": item_type}}
|
||||
data[field] = pure_list
|
||||
|
||||
# 5) Primitives
|
||||
# Determine item type from first element (if any)
|
||||
if payload:
|
||||
first = next(iter(payload))
|
||||
item_schema = _serialize_value_with_schema(first)
|
||||
base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
|
||||
else:
|
||||
prim_type = _primitive_schema_type(val)
|
||||
schema[field] = {"type": prim_type}
|
||||
data[field] = val
|
||||
base_schema = {"type": "array", "items": {}}
|
||||
|
||||
return {"serialization_schema": schema, "serialized_data": data}
|
||||
# Add JSON Schema properties to infer sets and tuples
|
||||
if isinstance(payload, set):
|
||||
base_schema["uniqueItems"] = True
|
||||
elif isinstance(payload, tuple):
|
||||
base_schema["minItems"] = len(payload)
|
||||
base_schema["maxItems"] = len(payload)
|
||||
|
||||
return {"serialization_schema": base_schema, "serialized_data": pure_list}
|
||||
|
||||
# Handle Haystack style objects (e.g. dataclasses and Components)
|
||||
elif hasattr(payload, "to_dict") and callable(payload.to_dict):
|
||||
type_name = generate_qualified_class_name(type(payload))
|
||||
pure = _convert_to_basic_types(payload)
|
||||
schema = {"type": type_name}
|
||||
return {"serialization_schema": schema, "serialized_data": pure}
|
||||
|
||||
# Handle arbitrary objects with __dict__
|
||||
elif hasattr(payload, "__dict__"):
|
||||
type_name = generate_qualified_class_name(type(payload))
|
||||
pure = _convert_to_basic_types(vars(payload))
|
||||
schema = {"type": type_name}
|
||||
return {"serialization_schema": schema, "serialized_data": pure}
|
||||
|
||||
# Handle primitives
|
||||
else:
|
||||
prim_type = _primitive_schema_type(payload)
|
||||
schema = {"type": prim_type}
|
||||
return {"serialization_schema": schema, "serialized_data": payload}
|
||||
|
||||
|
||||
def _primitive_schema_type(value: Any) -> str:
|
||||
@ -172,69 +178,103 @@ def _convert_to_basic_types(value: Any) -> Any:
|
||||
|
||||
# sequences
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
cls = type(value)
|
||||
return cls(_convert_to_basic_types(v) for v in value)
|
||||
return [_convert_to_basic_types(v) for v in value]
|
||||
|
||||
# primitive
|
||||
return value
|
||||
|
||||
|
||||
# TODO: Make this function public once its implementation is finalized and tested
|
||||
def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Any: # pylint: disable=too-many-return-statements, # noqa: PLR0911, PLR0912
|
||||
"""
|
||||
Deserializes a dictionary with schema information and data to original values.
|
||||
Deserializes a value with schema information back to its original form.
|
||||
|
||||
Takes a dict of the form:
|
||||
{
|
||||
"schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
},
|
||||
"data": {
|
||||
"numbers": 1,
|
||||
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
|
||||
"serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
|
||||
"serialized_data": <the actual data>
|
||||
}
|
||||
|
||||
:param serialized: The serialized dict with schema and data.
|
||||
:returns: The deserialized dict with original values.
|
||||
:returns: The deserialized value in its original form.
|
||||
"""
|
||||
schema = serialized.get("serialization_schema", {})
|
||||
data = serialized.get("serialized_data", {})
|
||||
|
||||
result: Dict[str, Any] = {}
|
||||
for field, raw in data.items():
|
||||
info = schema.get(field)
|
||||
# no schema entry → just deep-deserialize whatever we have
|
||||
if not info:
|
||||
result[field] = _deserialize_value(raw)
|
||||
continue
|
||||
if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
|
||||
raise DeserializationError(
|
||||
f"Invalid format of passed serialized payload. Expected a dictionary with keys "
|
||||
f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
|
||||
)
|
||||
schema = serialized["serialization_schema"]
|
||||
data = serialized["serialized_data"]
|
||||
|
||||
t = info["type"]
|
||||
schema_type = schema.get("type")
|
||||
|
||||
# ARRAY case
|
||||
if t == "array":
|
||||
item_type = info["items"]["type"]
|
||||
reconstructed = []
|
||||
for item in raw:
|
||||
envelope = {"type": item_type, "data": item}
|
||||
reconstructed.append(_deserialize_value(envelope))
|
||||
result[field] = reconstructed
|
||||
if not schema_type:
|
||||
# for backward comaptability till Haystack 2.16 we use legacy implementation
|
||||
warnings.warn(
|
||||
"Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
|
||||
"State object created with a version of Haystack older than 2.15.0. "
|
||||
"Support for the old serialization format will be removed in Haystack 2.16.0. "
|
||||
"Please upgrade to the new serialization format to ensure forward compatibility.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
return _deserialize_value_with_schema_legacy(serialized)
|
||||
|
||||
# PRIMITIVE case
|
||||
elif t in ("null", "boolean", "integer", "number", "string"):
|
||||
result[field] = raw
|
||||
# Handle object case (dictionary with properties)
|
||||
if schema_type == "object":
|
||||
properties = schema.get("properties")
|
||||
if properties:
|
||||
result: Dict[str, Any] = {}
|
||||
|
||||
# GENERIC OBJECT
|
||||
elif t == "object":
|
||||
envelope = {"type": "object", "data": raw}
|
||||
result[field] = _deserialize_value(envelope)
|
||||
if isinstance(data, dict):
|
||||
for field, raw_value in data.items():
|
||||
field_schema = properties.get(field)
|
||||
if field_schema:
|
||||
# Recursively deserialize each field - avoid creating temporary dict
|
||||
result[field] = _deserialize_value_with_schema(
|
||||
{"serialization_schema": field_schema, "serialized_data": raw_value}
|
||||
)
|
||||
|
||||
# CUSTOM CLASS
|
||||
return result
|
||||
else:
|
||||
envelope = {"type": t, "data": raw}
|
||||
result[field] = _deserialize_value(envelope)
|
||||
return _deserialize_value(data)
|
||||
|
||||
return result
|
||||
# Handle array case
|
||||
elif schema_type == "array":
|
||||
# Cache frequently accessed schema properties
|
||||
item_schema = schema.get("items", {})
|
||||
item_type = item_schema.get("type", "any")
|
||||
is_set = schema.get("uniqueItems") is True
|
||||
is_tuple = schema.get("minItems") is not None and schema.get("maxItems") is not None
|
||||
|
||||
# Handle nested objects/arrays first (most complex case)
|
||||
if item_type in ("object", "array"):
|
||||
return [
|
||||
_deserialize_value_with_schema({"serialization_schema": item_schema, "serialized_data": item})
|
||||
for item in data
|
||||
]
|
||||
|
||||
# Helper function to deserialize individual items
|
||||
def deserialize_item(item):
|
||||
if item_type == "any":
|
||||
return _deserialize_value(item)
|
||||
else:
|
||||
return _deserialize_value({"type": item_type, "data": item})
|
||||
|
||||
# Handle different collection types
|
||||
if is_set:
|
||||
return {deserialize_item(item) for item in data}
|
||||
elif is_tuple:
|
||||
return tuple(deserialize_item(item) for item in data)
|
||||
else:
|
||||
return [deserialize_item(item) for item in data]
|
||||
|
||||
# Handle primitive types
|
||||
elif schema_type in ("null", "boolean", "integer", "number", "string"):
|
||||
return data
|
||||
|
||||
# Handle custom class types
|
||||
else:
|
||||
return _deserialize_value({"type": schema_type, "data": data})
|
||||
|
||||
|
||||
def _deserialize_value(value: Any) -> Any: # pylint: disable=too-many-return-statements # noqa: PLR0911
|
||||
@ -291,3 +331,61 @@ def _deserialize_value(value: Any) -> Any: # pylint: disable=too-many-return-st
|
||||
|
||||
# 4) Fallback (shouldn't usually happen with our schema)
|
||||
return value
|
||||
|
||||
|
||||
def _deserialize_value_with_schema_legacy(serialized: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Legacy function for deserializing a dictionary with schema information and data to original values.
|
||||
|
||||
Kept for backward compatibility till Haystack 2.16.0.
|
||||
Takes a dict of the form:
|
||||
{
|
||||
"schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
},
|
||||
"data": {
|
||||
"numbers": 1,
|
||||
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
|
||||
}
|
||||
|
||||
:param serialized: The serialized dict with schema and data.
|
||||
:returns: The deserialized dict with original values.
|
||||
"""
|
||||
schema = serialized.get("serialization_schema", {})
|
||||
data = serialized.get("serialized_data", {})
|
||||
|
||||
result: Dict[str, Any] = {}
|
||||
for field, raw in data.items():
|
||||
info = schema.get(field)
|
||||
# no schema entry → just deep-deserialize whatever we have
|
||||
if not info:
|
||||
result[field] = _deserialize_value(raw)
|
||||
continue
|
||||
|
||||
t = info["type"]
|
||||
|
||||
# ARRAY case
|
||||
if t == "array":
|
||||
item_type = info["items"]["type"]
|
||||
reconstructed = []
|
||||
for item in raw:
|
||||
envelope = {"type": item_type, "data": item}
|
||||
reconstructed.append(_deserialize_value(envelope))
|
||||
result[field] = reconstructed
|
||||
|
||||
# PRIMITIVE case
|
||||
elif t in ("null", "boolean", "integer", "number", "string"):
|
||||
result[field] = raw
|
||||
|
||||
# GENERIC OBJECT
|
||||
elif t == "object":
|
||||
envelope = {"type": "object", "data": raw}
|
||||
result[field] = _deserialize_value(envelope)
|
||||
|
||||
# CUSTOM CLASS
|
||||
else:
|
||||
envelope = {"type": t, "data": raw}
|
||||
result[field] = _deserialize_value(envelope)
|
||||
|
||||
return result
|
||||
|
||||
@ -368,9 +368,15 @@ class TestState:
|
||||
}
|
||||
assert state_dict["data"] == {
|
||||
"serialization_schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
"dict_of_lists": {"type": "object"},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
"dict_of_lists": {
|
||||
"type": "object",
|
||||
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
"serialized_data": {
|
||||
"numbers": 1,
|
||||
@ -380,6 +386,57 @@ class TestState:
|
||||
}
|
||||
|
||||
def test_state_from_dict(self):
|
||||
state_dict = {
|
||||
"schema": {
|
||||
"numbers": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"},
|
||||
"messages": {
|
||||
"type": "typing.List[haystack.dataclasses.chat_message.ChatMessage]",
|
||||
"handler": "haystack.components.agents.state.state_utils.merge_lists",
|
||||
},
|
||||
"dict_of_lists": {
|
||||
"type": "dict",
|
||||
"handler": "haystack.components.agents.state.state_utils.replace_values",
|
||||
},
|
||||
},
|
||||
"data": {
|
||||
"serialization_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {"type": "haystack.dataclasses.chat_message.ChatMessage"},
|
||||
},
|
||||
"dict_of_lists": {
|
||||
"type": "object",
|
||||
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
|
||||
},
|
||||
},
|
||||
},
|
||||
"serialized_data": {
|
||||
"numbers": 1,
|
||||
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
|
||||
"dict_of_lists": {"numbers": [1, 2, 3]},
|
||||
},
|
||||
},
|
||||
}
|
||||
state = State.from_dict(state_dict)
|
||||
# Check types are correctly converted
|
||||
assert state.schema["numbers"]["type"] == int
|
||||
assert state.schema["dict_of_lists"]["type"] == dict
|
||||
# Check handlers are functions, not comparing exact functions as they might be different references
|
||||
assert callable(state.schema["numbers"]["handler"])
|
||||
assert callable(state.schema["messages"]["handler"])
|
||||
assert callable(state.schema["dict_of_lists"]["handler"])
|
||||
# Check data is correct
|
||||
assert state.data["numbers"] == 1
|
||||
assert state.data["messages"] == [ChatMessage.from_user(text="Hello, world!")]
|
||||
assert state.data["dict_of_lists"] == {"numbers": [1, 2, 3]}
|
||||
|
||||
def test_state_from_dict_legacy(self):
|
||||
# this is the old format of the state dictionary
|
||||
# it is kept for backward compatibility
|
||||
# it will be removed in Haystack 2.16.0
|
||||
state_dict = {
|
||||
"schema": {
|
||||
"numbers": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"},
|
||||
|
||||
@ -74,10 +74,233 @@ def test_deserialize_class_instance_invalid_data():
|
||||
deserialize_class_instance({"type": "test_base_serialization.CustomClassNoFromDict", "data": {}})
|
||||
|
||||
|
||||
def test_serialize_value_primitive_types():
|
||||
numbers = 1
|
||||
string = "test"
|
||||
bool = True
|
||||
none = None
|
||||
result = _serialize_value_with_schema(numbers)
|
||||
assert result == {"serialization_schema": {"type": "integer"}, "serialized_data": 1}
|
||||
result = _serialize_value_with_schema(string)
|
||||
assert result == {"serialization_schema": {"type": "string"}, "serialized_data": "test"}
|
||||
result = _serialize_value_with_schema(bool)
|
||||
assert result == {"serialization_schema": {"type": "boolean"}, "serialized_data": True}
|
||||
result = _serialize_value_with_schema(none)
|
||||
assert result == {"serialization_schema": {"type": "null"}, "serialized_data": None}
|
||||
|
||||
|
||||
def test_deserialize_value_primitive_types():
|
||||
result = _deserialize_value_with_schema({"serialization_schema": {"type": "integer"}, "serialized_data": 1})
|
||||
assert result == 1
|
||||
result = _deserialize_value_with_schema({"serialization_schema": {"type": "string"}, "serialized_data": "test"})
|
||||
assert result == "test"
|
||||
result = _deserialize_value_with_schema({"serialization_schema": {"type": "boolean"}, "serialized_data": True})
|
||||
assert result == True
|
||||
result = _deserialize_value_with_schema({"serialization_schema": {"type": "null"}, "serialized_data": None})
|
||||
assert result == None
|
||||
|
||||
|
||||
def test_serialize_value_with_sequences():
|
||||
sequences = [1, 2, 3]
|
||||
set_sequences = {1, 2, 3}
|
||||
tuple_sequences = (1, 2, 3)
|
||||
result = _serialize_value_with_schema(sequences)
|
||||
assert result == {
|
||||
"serialization_schema": {"type": "array", "items": {"type": "integer"}},
|
||||
"serialized_data": [1, 2, 3],
|
||||
}
|
||||
result = _serialize_value_with_schema(set_sequences)
|
||||
assert result == {
|
||||
"serialization_schema": {"type": "array", "items": {"type": "integer"}, "uniqueItems": True},
|
||||
"serialized_data": [1, 2, 3],
|
||||
}
|
||||
result = _serialize_value_with_schema(tuple_sequences)
|
||||
assert result == {
|
||||
"serialization_schema": {"type": "array", "items": {"type": "integer"}, "minItems": 3, "maxItems": 3},
|
||||
"serialized_data": [1, 2, 3],
|
||||
}
|
||||
|
||||
|
||||
def test_deserialize_value_with_sequences():
|
||||
sequences = [1, 2, 3]
|
||||
set_sequences = {1, 2, 3}
|
||||
tuple_sequences = (1, 2, 3)
|
||||
result = _deserialize_value_with_schema(
|
||||
{"serialization_schema": {"type": "array", "items": {"type": "integer"}}, "serialized_data": [1, 2, 3]}
|
||||
)
|
||||
assert result == sequences
|
||||
result = _deserialize_value_with_schema(
|
||||
{
|
||||
"serialization_schema": {"type": "array", "items": {"type": "integer"}, "uniqueItems": True},
|
||||
"serialized_data": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
assert result == set_sequences
|
||||
result = _deserialize_value_with_schema(
|
||||
{
|
||||
"serialization_schema": {
|
||||
"type": "array",
|
||||
"items": {"type": "integer"},
|
||||
"collection_type": "tuple",
|
||||
"minItems": 3,
|
||||
"maxItems": 3,
|
||||
},
|
||||
"serialized_data": [1, 2, 3],
|
||||
}
|
||||
)
|
||||
assert result == tuple_sequences
|
||||
|
||||
|
||||
def test_serializing_and_deserializing_nested_lists():
|
||||
nested_lists = [[1, 2], [3, 4]]
|
||||
|
||||
serialized_nested_lists = _serialize_value_with_schema(nested_lists)
|
||||
assert serialized_nested_lists == {
|
||||
"serialization_schema": {"type": "array", "items": {"type": "array", "items": {"type": "integer"}}},
|
||||
"serialized_data": [[1, 2], [3, 4]],
|
||||
}
|
||||
|
||||
deserialized_nested_lists = _deserialize_value_with_schema(serialized_nested_lists)
|
||||
assert deserialized_nested_lists == nested_lists
|
||||
|
||||
|
||||
def test_serializing_and_deserializing_nested_answer_lists():
|
||||
"""Test that _deserialize_value_with_schema handles nested lists"""
|
||||
|
||||
nested_answers_list = [
|
||||
[
|
||||
GeneratedAnswer(
|
||||
data="Paris",
|
||||
query="What is the capital of France?",
|
||||
documents=[Document(content="Paris is the capital of France")],
|
||||
meta={"page": 1},
|
||||
)
|
||||
],
|
||||
[
|
||||
GeneratedAnswer(
|
||||
data="Berlin",
|
||||
query="What is the capital of Germany?",
|
||||
documents=[Document(content="Berlin is the capital of Germany")],
|
||||
meta={"page": 1},
|
||||
)
|
||||
],
|
||||
]
|
||||
serialized_nested_answers_list = _serialize_value_with_schema(nested_answers_list)
|
||||
assert serialized_nested_answers_list == {
|
||||
"serialization_schema": {
|
||||
"type": "array",
|
||||
"items": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
|
||||
},
|
||||
"serialized_data": [
|
||||
[
|
||||
{
|
||||
"type": "haystack.dataclasses.answer.GeneratedAnswer",
|
||||
"init_parameters": {
|
||||
"data": "Paris",
|
||||
"query": "What is the capital of France?",
|
||||
"documents": [
|
||||
{
|
||||
"id": "413dccdf51a54cca75b7ed2eddac04e6e58560bd2f0caf4106a3efc023fe3651",
|
||||
"content": "Paris is the capital of France",
|
||||
"blob": None,
|
||||
"meta": {},
|
||||
"score": None,
|
||||
"embedding": None,
|
||||
"sparse_embedding": None,
|
||||
}
|
||||
],
|
||||
"meta": {"page": 1},
|
||||
},
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"type": "haystack.dataclasses.answer.GeneratedAnswer",
|
||||
"init_parameters": {
|
||||
"data": "Berlin",
|
||||
"query": "What is the capital of Germany?",
|
||||
"documents": [
|
||||
{
|
||||
"id": "c7b5b839963fcbf9b394b24c883731e840c3170ace33afb7af87a2de8a257f6f",
|
||||
"content": "Berlin is the capital of Germany",
|
||||
"blob": None,
|
||||
"meta": {},
|
||||
"score": None,
|
||||
"embedding": None,
|
||||
"sparse_embedding": None,
|
||||
}
|
||||
],
|
||||
"meta": {"page": 1},
|
||||
},
|
||||
}
|
||||
],
|
||||
],
|
||||
}
|
||||
|
||||
deserialized_nested_answers_list = _deserialize_value_with_schema(serialized_nested_answers_list)
|
||||
assert deserialized_nested_answers_list == nested_answers_list
|
||||
|
||||
|
||||
def test_serializing_and_deserializing_nested_dicts():
|
||||
data = {"key1": {"nested1": "value1", "nested2": {"deep": "value2"}}}
|
||||
serialized_nested_dicts = _serialize_value_with_schema(data)
|
||||
assert serialized_nested_dicts == {
|
||||
"serialization_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"key1": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"nested1": {"type": "string"},
|
||||
"nested2": {"type": "object", "properties": {"deep": {"type": "string"}}},
|
||||
},
|
||||
}
|
||||
},
|
||||
},
|
||||
"serialized_data": {"key1": {"nested1": "value1", "nested2": {"deep": "value2"}}},
|
||||
}
|
||||
|
||||
deserialized_nested_dicts = _deserialize_value_with_schema(serialized_nested_dicts)
|
||||
assert deserialized_nested_dicts == data
|
||||
|
||||
|
||||
def test_serializing_and_deserializing_nested_sets():
|
||||
nested_sets = [{1, 2}, {3, 4}]
|
||||
|
||||
result = _serialize_value_with_schema(nested_sets)
|
||||
assert result == {
|
||||
"serialization_schema": {
|
||||
"items": {"items": {"type": "integer"}, "type": "array", "uniqueItems": True},
|
||||
"type": "array",
|
||||
},
|
||||
"serialized_data": [[1, 2], [3, 4]],
|
||||
}
|
||||
|
||||
result = _deserialize_value_with_schema(
|
||||
{
|
||||
"serialization_schema": {
|
||||
"items": {"items": {"type": "integer"}, "type": "array", "uniqueItems": True},
|
||||
"type": "array",
|
||||
},
|
||||
"serialized_data": [[1, 2], [3, 4]],
|
||||
}
|
||||
)
|
||||
assert result == nested_sets
|
||||
|
||||
|
||||
def test_serializing_and_deserializing_empty_structures():
|
||||
"""Test that _deserialize_value_with_schema handles empty structures"""
|
||||
data = {"empty_list": [], "empty_dict": {}, "nested_empty": {"empty": []}}
|
||||
serialized_data = _serialize_value_with_schema(data)
|
||||
result = _deserialize_value_with_schema(serialized_data)
|
||||
|
||||
assert result == data
|
||||
|
||||
|
||||
def test_serialize_value_with_schema():
|
||||
data = {
|
||||
"numbers": 1,
|
||||
"messages": [ChatMessage.from_user(text="Hello, world!")],
|
||||
"messages": [ChatMessage.from_user(text="Hello, world!"), ChatMessage.from_assistant(text="Hello, world!")],
|
||||
"user_id": "123",
|
||||
"dict_of_lists": {"numbers": [1, 2, 3]},
|
||||
"documents": [Document(content="Hello, world!")],
|
||||
@ -94,17 +317,32 @@ def test_serialize_value_with_schema():
|
||||
result = _serialize_value_with_schema(data)
|
||||
assert result == {
|
||||
"serialization_schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
"user_id": {"type": "string"},
|
||||
"dict_of_lists": {"type": "object"},
|
||||
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
|
||||
"list_of_dicts": {"type": "array", "items": {"type": "string"}},
|
||||
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
"user_id": {"type": "string"},
|
||||
"dict_of_lists": {
|
||||
"type": "object",
|
||||
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
|
||||
},
|
||||
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
|
||||
"list_of_dicts": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
|
||||
},
|
||||
},
|
||||
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
|
||||
},
|
||||
},
|
||||
"serialized_data": {
|
||||
"numbers": 1,
|
||||
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
|
||||
"messages": [
|
||||
{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
|
||||
{"role": "assistant", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
|
||||
],
|
||||
"user_id": "123",
|
||||
"dict_of_lists": {"numbers": [1, 2, 3]},
|
||||
"documents": [
|
||||
@ -146,17 +384,26 @@ def test_serialize_value_with_schema():
|
||||
def test_deserialize_value_with_schema():
|
||||
serialized__data = {
|
||||
"serialization_schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
"user_id": {"type": "string"},
|
||||
"dict_of_lists": {"type": "object"},
|
||||
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
|
||||
"list_of_dicts": {"type": "array", "items": {"type": "string"}},
|
||||
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"numbers": {"type": "integer"},
|
||||
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
|
||||
"user_id": {"type": "string"},
|
||||
"dict_of_lists": {
|
||||
"type": "object",
|
||||
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
|
||||
},
|
||||
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
|
||||
"list_of_dicts": {"type": "array", "items": {"type": "string"}},
|
||||
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
|
||||
},
|
||||
},
|
||||
"serialized_data": {
|
||||
"numbers": 1,
|
||||
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
|
||||
"messages": [
|
||||
{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
|
||||
{"role": "assistant", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
|
||||
],
|
||||
"user_id": "123",
|
||||
"dict_of_lists": {"numbers": [1, 2, 3]},
|
||||
"documents": [
|
||||
@ -205,27 +452,21 @@ def test_deserialize_value_with_schema():
|
||||
assert isinstance(result["answers"][0], GeneratedAnswer)
|
||||
|
||||
|
||||
def test_serialize_value_with_custom_class_type():
|
||||
def test_serializing_and_deserializing_custom_class_type():
|
||||
custom_type = CustomClass()
|
||||
data = {"numbers": 1, "custom_type": custom_type}
|
||||
result = _serialize_value_with_schema(data)
|
||||
assert result == {
|
||||
serialized_data = _serialize_value_with_schema(data)
|
||||
assert serialized_data == {
|
||||
"serialization_schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"custom_type": {"type": "test_base_serialization.CustomClass"},
|
||||
"properties": {
|
||||
"custom_type": {"type": "test_base_serialization.CustomClass"},
|
||||
"numbers": {"type": "integer"},
|
||||
},
|
||||
"type": "object",
|
||||
},
|
||||
"serialized_data": {"numbers": 1, "custom_type": {"key": "value", "more": False}},
|
||||
}
|
||||
|
||||
|
||||
def test_deserialize_value_with_custom_class_type():
|
||||
serialized_data = {
|
||||
"serialization_schema": {
|
||||
"numbers": {"type": "integer"},
|
||||
"custom_type": {"type": "test_base_serialization.CustomClass"},
|
||||
},
|
||||
"serialized_data": {"numbers": 1, "custom_type": {"key": "value", "more": False}},
|
||||
}
|
||||
result = _deserialize_value_with_schema(serialized_data)
|
||||
assert result["numbers"] == 1
|
||||
assert isinstance(result["custom_type"], CustomClass)
|
||||
deserialized_data = _deserialize_value_with_schema(serialized_data)
|
||||
assert deserialized_data["numbers"] == 1
|
||||
assert isinstance(deserialized_data["custom_type"], CustomClass)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user