fix: Update the de/serialization with schema utils (#9526)

* Update the util methods

* Update tests

* fix tests

* schema fix

* Add json schema for tuples and sets

* Add proper conversion for sets and tuples

* Adjust typing

* PR comments

* Linting

* Optimize deserialization

* remove TODO

* PR comments

* PR comments

* Update tests and deserialization error

* Support legacy deserialization

* Update deprecating warning

* Update test
This commit is contained in:
Amna Mubashar 2025-06-24 13:10:12 +02:00 committed by GitHub
parent d14f5dca0e
commit 9ed0b9b0bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 524 additions and 128 deletions

View File

@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0
import warnings
from typing import Any, Dict
from haystack.core.errors import DeserializationError, SerializationError
@ -54,10 +55,9 @@ def deserialize_class_instance(data: Dict[str, Any]) -> Any:
return obj_class.from_dict(data["data"])
# TODO: Make this function public once its implementation is finalized and tested
def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]:
def _serialize_value_with_schema(payload: Any) -> Dict[str, Any]:
"""
Serializes a dictionary into a schema-aware format suitable for storage or transmission.
Serializes a value into a schema-aware format suitable for storage or transmission.
The output format separates the schema information from the actual data, making it easier
to deserialize complex nested structures correctly.
@ -66,63 +66,69 @@ def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]:
- Objects with to_dict() methods (e.g. dataclasses)
- Objects with __dict__ attributes
- Dictionaries
- Lists, tuples, and sets
- Lists, tuples, and sets. Lists with mixed types are not supported.
- Primitive types (str, int, float, bool, None)
:param value: The value to serialize
:param payload: The value to serialize (can be any type)
:returns: The serialized dict representation of the given value. Contains two keys:
- "schema": Contains type information for each field
- "data": Contains the actual data in a simplified format
- "serialization_schema": Contains type information for each field.
- "serialized_data": Contains the actual data in a simplified format.
"""
schema: Dict[str, Any] = {}
data: Dict[str, Any] = {}
# Handle dictionary case - iterate through fields
if isinstance(payload, dict):
schema: Dict[str, Any] = {}
data: Dict[str, Any] = {}
for field, val in payload.items():
# 1) Handle dataclassstyle objects
if hasattr(val, "to_dict") and callable(val.to_dict):
type_name = generate_qualified_class_name(type(val))
pure = _convert_to_basic_types(val.to_dict())
schema[field] = {"type": type_name}
data[field] = pure
for field, val in payload.items():
# Recursively serialize each field
serialized_value = _serialize_value_with_schema(val)
schema[field] = serialized_value["serialization_schema"]
data[field] = serialized_value["serialized_data"]
# 2) Arbitrary objects w/ __dict__
elif hasattr(val, "__dict__"):
type_name = generate_qualified_class_name(type(val))
pure = _convert_to_basic_types(vars(val))
schema[field] = {"type": type_name}
data[field] = pure
return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data}
# 3) Dicts → "object"
elif isinstance(val, dict):
pure = _convert_to_basic_types(val)
schema[field] = {"type": "object"}
data[field] = pure
# Handle array case - iterate through elements
elif isinstance(payload, (list, tuple, set)):
# Convert to list for consistent handling
pure_list = _convert_to_basic_types(list(payload))
# 4) Sequences → "array"
elif isinstance(val, (list, tuple, set)):
# pure data
pure_list = _convert_to_basic_types(list(val))
# determine item type from first element (if any)
if val:
first = next(iter(val))
if hasattr(first, "to_dict") and callable(first.to_dict) or hasattr(first, "__dict__"):
item_type = generate_qualified_class_name(type(first))
else:
item_type = _primitive_schema_type(first)
else:
item_type = "any"
schema[field] = {"type": "array", "items": {"type": item_type}}
data[field] = pure_list
# 5) Primitives
# Determine item type from first element (if any)
if payload:
first = next(iter(payload))
item_schema = _serialize_value_with_schema(first)
base_schema = {"type": "array", "items": item_schema["serialization_schema"]}
else:
prim_type = _primitive_schema_type(val)
schema[field] = {"type": prim_type}
data[field] = val
base_schema = {"type": "array", "items": {}}
return {"serialization_schema": schema, "serialized_data": data}
# Add JSON Schema properties to infer sets and tuples
if isinstance(payload, set):
base_schema["uniqueItems"] = True
elif isinstance(payload, tuple):
base_schema["minItems"] = len(payload)
base_schema["maxItems"] = len(payload)
return {"serialization_schema": base_schema, "serialized_data": pure_list}
# Handle Haystack style objects (e.g. dataclasses and Components)
elif hasattr(payload, "to_dict") and callable(payload.to_dict):
type_name = generate_qualified_class_name(type(payload))
pure = _convert_to_basic_types(payload)
schema = {"type": type_name}
return {"serialization_schema": schema, "serialized_data": pure}
# Handle arbitrary objects with __dict__
elif hasattr(payload, "__dict__"):
type_name = generate_qualified_class_name(type(payload))
pure = _convert_to_basic_types(vars(payload))
schema = {"type": type_name}
return {"serialization_schema": schema, "serialized_data": pure}
# Handle primitives
else:
prim_type = _primitive_schema_type(payload)
schema = {"type": prim_type}
return {"serialization_schema": schema, "serialized_data": payload}
def _primitive_schema_type(value: Any) -> str:
@ -172,69 +178,103 @@ def _convert_to_basic_types(value: Any) -> Any:
# sequences
if isinstance(value, (list, tuple, set)):
cls = type(value)
return cls(_convert_to_basic_types(v) for v in value)
return [_convert_to_basic_types(v) for v in value]
# primitive
return value
# TODO: Make this function public once its implementation is finalized and tested
def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Dict[str, Any]:
def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Any: # pylint: disable=too-many-return-statements, # noqa: PLR0911, PLR0912
"""
Deserializes a dictionary with schema information and data to original values.
Deserializes a value with schema information back to its original form.
Takes a dict of the form:
{
"schema": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
},
"data": {
"numbers": 1,
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
"serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}},
"serialized_data": <the actual data>
}
:param serialized: The serialized dict with schema and data.
:returns: The deserialized dict with original values.
:returns: The deserialized value in its original form.
"""
schema = serialized.get("serialization_schema", {})
data = serialized.get("serialized_data", {})
result: Dict[str, Any] = {}
for field, raw in data.items():
info = schema.get(field)
# no schema entry → just deep-deserialize whatever we have
if not info:
result[field] = _deserialize_value(raw)
continue
if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized:
raise DeserializationError(
f"Invalid format of passed serialized payload. Expected a dictionary with keys "
f"'serialization_schema' and 'serialized_data'. Got: {serialized}"
)
schema = serialized["serialization_schema"]
data = serialized["serialized_data"]
t = info["type"]
schema_type = schema.get("type")
# ARRAY case
if t == "array":
item_type = info["items"]["type"]
reconstructed = []
for item in raw:
envelope = {"type": item_type, "data": item}
reconstructed.append(_deserialize_value(envelope))
result[field] = reconstructed
if not schema_type:
# for backward comaptability till Haystack 2.16 we use legacy implementation
warnings.warn(
"Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized "
"State object created with a version of Haystack older than 2.15.0. "
"Support for the old serialization format will be removed in Haystack 2.16.0. "
"Please upgrade to the new serialization format to ensure forward compatibility.",
DeprecationWarning,
)
return _deserialize_value_with_schema_legacy(serialized)
# PRIMITIVE case
elif t in ("null", "boolean", "integer", "number", "string"):
result[field] = raw
# Handle object case (dictionary with properties)
if schema_type == "object":
properties = schema.get("properties")
if properties:
result: Dict[str, Any] = {}
# GENERIC OBJECT
elif t == "object":
envelope = {"type": "object", "data": raw}
result[field] = _deserialize_value(envelope)
if isinstance(data, dict):
for field, raw_value in data.items():
field_schema = properties.get(field)
if field_schema:
# Recursively deserialize each field - avoid creating temporary dict
result[field] = _deserialize_value_with_schema(
{"serialization_schema": field_schema, "serialized_data": raw_value}
)
# CUSTOM CLASS
return result
else:
envelope = {"type": t, "data": raw}
result[field] = _deserialize_value(envelope)
return _deserialize_value(data)
return result
# Handle array case
elif schema_type == "array":
# Cache frequently accessed schema properties
item_schema = schema.get("items", {})
item_type = item_schema.get("type", "any")
is_set = schema.get("uniqueItems") is True
is_tuple = schema.get("minItems") is not None and schema.get("maxItems") is not None
# Handle nested objects/arrays first (most complex case)
if item_type in ("object", "array"):
return [
_deserialize_value_with_schema({"serialization_schema": item_schema, "serialized_data": item})
for item in data
]
# Helper function to deserialize individual items
def deserialize_item(item):
if item_type == "any":
return _deserialize_value(item)
else:
return _deserialize_value({"type": item_type, "data": item})
# Handle different collection types
if is_set:
return {deserialize_item(item) for item in data}
elif is_tuple:
return tuple(deserialize_item(item) for item in data)
else:
return [deserialize_item(item) for item in data]
# Handle primitive types
elif schema_type in ("null", "boolean", "integer", "number", "string"):
return data
# Handle custom class types
else:
return _deserialize_value({"type": schema_type, "data": data})
def _deserialize_value(value: Any) -> Any: # pylint: disable=too-many-return-statements # noqa: PLR0911
@ -291,3 +331,61 @@ def _deserialize_value(value: Any) -> Any: # pylint: disable=too-many-return-st
# 4) Fallback (shouldn't usually happen with our schema)
return value
def _deserialize_value_with_schema_legacy(serialized: Dict[str, Any]) -> Dict[str, Any]:
"""
Legacy function for deserializing a dictionary with schema information and data to original values.
Kept for backward compatibility till Haystack 2.16.0.
Takes a dict of the form:
{
"schema": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
},
"data": {
"numbers": 1,
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
}
:param serialized: The serialized dict with schema and data.
:returns: The deserialized dict with original values.
"""
schema = serialized.get("serialization_schema", {})
data = serialized.get("serialized_data", {})
result: Dict[str, Any] = {}
for field, raw in data.items():
info = schema.get(field)
# no schema entry → just deep-deserialize whatever we have
if not info:
result[field] = _deserialize_value(raw)
continue
t = info["type"]
# ARRAY case
if t == "array":
item_type = info["items"]["type"]
reconstructed = []
for item in raw:
envelope = {"type": item_type, "data": item}
reconstructed.append(_deserialize_value(envelope))
result[field] = reconstructed
# PRIMITIVE case
elif t in ("null", "boolean", "integer", "number", "string"):
result[field] = raw
# GENERIC OBJECT
elif t == "object":
envelope = {"type": "object", "data": raw}
result[field] = _deserialize_value(envelope)
# CUSTOM CLASS
else:
envelope = {"type": t, "data": raw}
result[field] = _deserialize_value(envelope)
return result

View File

@ -368,9 +368,15 @@ class TestState:
}
assert state_dict["data"] == {
"serialization_schema": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
"dict_of_lists": {"type": "object"},
"type": "object",
"properties": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
"dict_of_lists": {
"type": "object",
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
},
},
},
"serialized_data": {
"numbers": 1,
@ -380,6 +386,57 @@ class TestState:
}
def test_state_from_dict(self):
state_dict = {
"schema": {
"numbers": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"},
"messages": {
"type": "typing.List[haystack.dataclasses.chat_message.ChatMessage]",
"handler": "haystack.components.agents.state.state_utils.merge_lists",
},
"dict_of_lists": {
"type": "dict",
"handler": "haystack.components.agents.state.state_utils.replace_values",
},
},
"data": {
"serialization_schema": {
"type": "object",
"properties": {
"numbers": {"type": "integer"},
"messages": {
"type": "array",
"items": {"type": "haystack.dataclasses.chat_message.ChatMessage"},
},
"dict_of_lists": {
"type": "object",
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
},
},
},
"serialized_data": {
"numbers": 1,
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
"dict_of_lists": {"numbers": [1, 2, 3]},
},
},
}
state = State.from_dict(state_dict)
# Check types are correctly converted
assert state.schema["numbers"]["type"] == int
assert state.schema["dict_of_lists"]["type"] == dict
# Check handlers are functions, not comparing exact functions as they might be different references
assert callable(state.schema["numbers"]["handler"])
assert callable(state.schema["messages"]["handler"])
assert callable(state.schema["dict_of_lists"]["handler"])
# Check data is correct
assert state.data["numbers"] == 1
assert state.data["messages"] == [ChatMessage.from_user(text="Hello, world!")]
assert state.data["dict_of_lists"] == {"numbers": [1, 2, 3]}
def test_state_from_dict_legacy(self):
# this is the old format of the state dictionary
# it is kept for backward compatibility
# it will be removed in Haystack 2.16.0
state_dict = {
"schema": {
"numbers": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"},

View File

@ -74,10 +74,233 @@ def test_deserialize_class_instance_invalid_data():
deserialize_class_instance({"type": "test_base_serialization.CustomClassNoFromDict", "data": {}})
def test_serialize_value_primitive_types():
numbers = 1
string = "test"
bool = True
none = None
result = _serialize_value_with_schema(numbers)
assert result == {"serialization_schema": {"type": "integer"}, "serialized_data": 1}
result = _serialize_value_with_schema(string)
assert result == {"serialization_schema": {"type": "string"}, "serialized_data": "test"}
result = _serialize_value_with_schema(bool)
assert result == {"serialization_schema": {"type": "boolean"}, "serialized_data": True}
result = _serialize_value_with_schema(none)
assert result == {"serialization_schema": {"type": "null"}, "serialized_data": None}
def test_deserialize_value_primitive_types():
result = _deserialize_value_with_schema({"serialization_schema": {"type": "integer"}, "serialized_data": 1})
assert result == 1
result = _deserialize_value_with_schema({"serialization_schema": {"type": "string"}, "serialized_data": "test"})
assert result == "test"
result = _deserialize_value_with_schema({"serialization_schema": {"type": "boolean"}, "serialized_data": True})
assert result == True
result = _deserialize_value_with_schema({"serialization_schema": {"type": "null"}, "serialized_data": None})
assert result == None
def test_serialize_value_with_sequences():
sequences = [1, 2, 3]
set_sequences = {1, 2, 3}
tuple_sequences = (1, 2, 3)
result = _serialize_value_with_schema(sequences)
assert result == {
"serialization_schema": {"type": "array", "items": {"type": "integer"}},
"serialized_data": [1, 2, 3],
}
result = _serialize_value_with_schema(set_sequences)
assert result == {
"serialization_schema": {"type": "array", "items": {"type": "integer"}, "uniqueItems": True},
"serialized_data": [1, 2, 3],
}
result = _serialize_value_with_schema(tuple_sequences)
assert result == {
"serialization_schema": {"type": "array", "items": {"type": "integer"}, "minItems": 3, "maxItems": 3},
"serialized_data": [1, 2, 3],
}
def test_deserialize_value_with_sequences():
sequences = [1, 2, 3]
set_sequences = {1, 2, 3}
tuple_sequences = (1, 2, 3)
result = _deserialize_value_with_schema(
{"serialization_schema": {"type": "array", "items": {"type": "integer"}}, "serialized_data": [1, 2, 3]}
)
assert result == sequences
result = _deserialize_value_with_schema(
{
"serialization_schema": {"type": "array", "items": {"type": "integer"}, "uniqueItems": True},
"serialized_data": [1, 2, 3],
}
)
assert result == set_sequences
result = _deserialize_value_with_schema(
{
"serialization_schema": {
"type": "array",
"items": {"type": "integer"},
"collection_type": "tuple",
"minItems": 3,
"maxItems": 3,
},
"serialized_data": [1, 2, 3],
}
)
assert result == tuple_sequences
def test_serializing_and_deserializing_nested_lists():
nested_lists = [[1, 2], [3, 4]]
serialized_nested_lists = _serialize_value_with_schema(nested_lists)
assert serialized_nested_lists == {
"serialization_schema": {"type": "array", "items": {"type": "array", "items": {"type": "integer"}}},
"serialized_data": [[1, 2], [3, 4]],
}
deserialized_nested_lists = _deserialize_value_with_schema(serialized_nested_lists)
assert deserialized_nested_lists == nested_lists
def test_serializing_and_deserializing_nested_answer_lists():
"""Test that _deserialize_value_with_schema handles nested lists"""
nested_answers_list = [
[
GeneratedAnswer(
data="Paris",
query="What is the capital of France?",
documents=[Document(content="Paris is the capital of France")],
meta={"page": 1},
)
],
[
GeneratedAnswer(
data="Berlin",
query="What is the capital of Germany?",
documents=[Document(content="Berlin is the capital of Germany")],
meta={"page": 1},
)
],
]
serialized_nested_answers_list = _serialize_value_with_schema(nested_answers_list)
assert serialized_nested_answers_list == {
"serialization_schema": {
"type": "array",
"items": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
},
"serialized_data": [
[
{
"type": "haystack.dataclasses.answer.GeneratedAnswer",
"init_parameters": {
"data": "Paris",
"query": "What is the capital of France?",
"documents": [
{
"id": "413dccdf51a54cca75b7ed2eddac04e6e58560bd2f0caf4106a3efc023fe3651",
"content": "Paris is the capital of France",
"blob": None,
"meta": {},
"score": None,
"embedding": None,
"sparse_embedding": None,
}
],
"meta": {"page": 1},
},
}
],
[
{
"type": "haystack.dataclasses.answer.GeneratedAnswer",
"init_parameters": {
"data": "Berlin",
"query": "What is the capital of Germany?",
"documents": [
{
"id": "c7b5b839963fcbf9b394b24c883731e840c3170ace33afb7af87a2de8a257f6f",
"content": "Berlin is the capital of Germany",
"blob": None,
"meta": {},
"score": None,
"embedding": None,
"sparse_embedding": None,
}
],
"meta": {"page": 1},
},
}
],
],
}
deserialized_nested_answers_list = _deserialize_value_with_schema(serialized_nested_answers_list)
assert deserialized_nested_answers_list == nested_answers_list
def test_serializing_and_deserializing_nested_dicts():
data = {"key1": {"nested1": "value1", "nested2": {"deep": "value2"}}}
serialized_nested_dicts = _serialize_value_with_schema(data)
assert serialized_nested_dicts == {
"serialization_schema": {
"type": "object",
"properties": {
"key1": {
"type": "object",
"properties": {
"nested1": {"type": "string"},
"nested2": {"type": "object", "properties": {"deep": {"type": "string"}}},
},
}
},
},
"serialized_data": {"key1": {"nested1": "value1", "nested2": {"deep": "value2"}}},
}
deserialized_nested_dicts = _deserialize_value_with_schema(serialized_nested_dicts)
assert deserialized_nested_dicts == data
def test_serializing_and_deserializing_nested_sets():
nested_sets = [{1, 2}, {3, 4}]
result = _serialize_value_with_schema(nested_sets)
assert result == {
"serialization_schema": {
"items": {"items": {"type": "integer"}, "type": "array", "uniqueItems": True},
"type": "array",
},
"serialized_data": [[1, 2], [3, 4]],
}
result = _deserialize_value_with_schema(
{
"serialization_schema": {
"items": {"items": {"type": "integer"}, "type": "array", "uniqueItems": True},
"type": "array",
},
"serialized_data": [[1, 2], [3, 4]],
}
)
assert result == nested_sets
def test_serializing_and_deserializing_empty_structures():
"""Test that _deserialize_value_with_schema handles empty structures"""
data = {"empty_list": [], "empty_dict": {}, "nested_empty": {"empty": []}}
serialized_data = _serialize_value_with_schema(data)
result = _deserialize_value_with_schema(serialized_data)
assert result == data
def test_serialize_value_with_schema():
data = {
"numbers": 1,
"messages": [ChatMessage.from_user(text="Hello, world!")],
"messages": [ChatMessage.from_user(text="Hello, world!"), ChatMessage.from_assistant(text="Hello, world!")],
"user_id": "123",
"dict_of_lists": {"numbers": [1, 2, 3]},
"documents": [Document(content="Hello, world!")],
@ -94,17 +317,32 @@ def test_serialize_value_with_schema():
result = _serialize_value_with_schema(data)
assert result == {
"serialization_schema": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
"user_id": {"type": "string"},
"dict_of_lists": {"type": "object"},
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
"list_of_dicts": {"type": "array", "items": {"type": "string"}},
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
"type": "object",
"properties": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
"user_id": {"type": "string"},
"dict_of_lists": {
"type": "object",
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
},
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
"list_of_dicts": {
"type": "array",
"items": {
"type": "object",
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
},
},
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
},
},
"serialized_data": {
"numbers": 1,
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
"messages": [
{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
{"role": "assistant", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
],
"user_id": "123",
"dict_of_lists": {"numbers": [1, 2, 3]},
"documents": [
@ -146,17 +384,26 @@ def test_serialize_value_with_schema():
def test_deserialize_value_with_schema():
serialized__data = {
"serialization_schema": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
"user_id": {"type": "string"},
"dict_of_lists": {"type": "object"},
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
"list_of_dicts": {"type": "array", "items": {"type": "string"}},
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
"type": "object",
"properties": {
"numbers": {"type": "integer"},
"messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}},
"user_id": {"type": "string"},
"dict_of_lists": {
"type": "object",
"properties": {"numbers": {"type": "array", "items": {"type": "integer"}}},
},
"documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}},
"list_of_dicts": {"type": "array", "items": {"type": "string"}},
"answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}},
},
},
"serialized_data": {
"numbers": 1,
"messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}],
"messages": [
{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
{"role": "assistant", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]},
],
"user_id": "123",
"dict_of_lists": {"numbers": [1, 2, 3]},
"documents": [
@ -205,27 +452,21 @@ def test_deserialize_value_with_schema():
assert isinstance(result["answers"][0], GeneratedAnswer)
def test_serialize_value_with_custom_class_type():
def test_serializing_and_deserializing_custom_class_type():
custom_type = CustomClass()
data = {"numbers": 1, "custom_type": custom_type}
result = _serialize_value_with_schema(data)
assert result == {
serialized_data = _serialize_value_with_schema(data)
assert serialized_data == {
"serialization_schema": {
"numbers": {"type": "integer"},
"custom_type": {"type": "test_base_serialization.CustomClass"},
"properties": {
"custom_type": {"type": "test_base_serialization.CustomClass"},
"numbers": {"type": "integer"},
},
"type": "object",
},
"serialized_data": {"numbers": 1, "custom_type": {"key": "value", "more": False}},
}
def test_deserialize_value_with_custom_class_type():
serialized_data = {
"serialization_schema": {
"numbers": {"type": "integer"},
"custom_type": {"type": "test_base_serialization.CustomClass"},
},
"serialized_data": {"numbers": 1, "custom_type": {"key": "value", "more": False}},
}
result = _deserialize_value_with_schema(serialized_data)
assert result["numbers"] == 1
assert isinstance(result["custom_type"], CustomClass)
deserialized_data = _deserialize_value_with_schema(serialized_data)
assert deserialized_data["numbers"] == 1
assert isinstance(deserialized_data["custom_type"], CustomClass)