From 9ed0b9b0bc2378e044e4f06f7b7e66a6192fdfaa Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Tue, 24 Jun 2025 13:10:12 +0200 Subject: [PATCH] fix: Update the de/serialization with schema utils (#9526) * Update the util methods * Update tests * fix tests * schema fix * Add json schema for tuples and sets * Add proper conversion for sets and tuples * Adjust typing * PR comments * Linting * Optimize deserialization * remove TODO * PR comments * PR comments * Update tests and deserialization error * Support legacy deserialization * Update deprecating warning * Update test --- haystack/utils/base_serialization.py | 280 +++++++++++++------ test/components/agents/test_state_class.py | 63 ++++- test/utils/test_base_serialization.py | 309 ++++++++++++++++++--- 3 files changed, 524 insertions(+), 128 deletions(-) diff --git a/haystack/utils/base_serialization.py b/haystack/utils/base_serialization.py index 225aa6e20..71f34c2a7 100644 --- a/haystack/utils/base_serialization.py +++ b/haystack/utils/base_serialization.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import warnings from typing import Any, Dict from haystack.core.errors import DeserializationError, SerializationError @@ -54,10 +55,9 @@ def deserialize_class_instance(data: Dict[str, Any]) -> Any: return obj_class.from_dict(data["data"]) -# TODO: Make this function public once its implementation is finalized and tested -def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]: +def _serialize_value_with_schema(payload: Any) -> Dict[str, Any]: """ - Serializes a dictionary into a schema-aware format suitable for storage or transmission. + Serializes a value into a schema-aware format suitable for storage or transmission. The output format separates the schema information from the actual data, making it easier to deserialize complex nested structures correctly. @@ -66,63 +66,69 @@ def _serialize_value_with_schema(payload: Dict[str, Any]) -> Dict[str, Any]: - Objects with to_dict() methods (e.g. dataclasses) - Objects with __dict__ attributes - Dictionaries - - Lists, tuples, and sets + - Lists, tuples, and sets. Lists with mixed types are not supported. - Primitive types (str, int, float, bool, None) - :param value: The value to serialize + :param payload: The value to serialize (can be any type) :returns: The serialized dict representation of the given value. Contains two keys: - - "schema": Contains type information for each field - - "data": Contains the actual data in a simplified format + - "serialization_schema": Contains type information for each field. + - "serialized_data": Contains the actual data in a simplified format. """ - schema: Dict[str, Any] = {} - data: Dict[str, Any] = {} + # Handle dictionary case - iterate through fields + if isinstance(payload, dict): + schema: Dict[str, Any] = {} + data: Dict[str, Any] = {} - for field, val in payload.items(): - # 1) Handle dataclass‐style objects - if hasattr(val, "to_dict") and callable(val.to_dict): - type_name = generate_qualified_class_name(type(val)) - pure = _convert_to_basic_types(val.to_dict()) - schema[field] = {"type": type_name} - data[field] = pure + for field, val in payload.items(): + # Recursively serialize each field + serialized_value = _serialize_value_with_schema(val) + schema[field] = serialized_value["serialization_schema"] + data[field] = serialized_value["serialized_data"] - # 2) Arbitrary objects w/ __dict__ - elif hasattr(val, "__dict__"): - type_name = generate_qualified_class_name(type(val)) - pure = _convert_to_basic_types(vars(val)) - schema[field] = {"type": type_name} - data[field] = pure + return {"serialization_schema": {"type": "object", "properties": schema}, "serialized_data": data} - # 3) Dicts → "object" - elif isinstance(val, dict): - pure = _convert_to_basic_types(val) - schema[field] = {"type": "object"} - data[field] = pure + # Handle array case - iterate through elements + elif isinstance(payload, (list, tuple, set)): + # Convert to list for consistent handling + pure_list = _convert_to_basic_types(list(payload)) - # 4) Sequences → "array" - elif isinstance(val, (list, tuple, set)): - # pure data - pure_list = _convert_to_basic_types(list(val)) - # determine item type from first element (if any) - if val: - first = next(iter(val)) - if hasattr(first, "to_dict") and callable(first.to_dict) or hasattr(first, "__dict__"): - item_type = generate_qualified_class_name(type(first)) - else: - item_type = _primitive_schema_type(first) - else: - item_type = "any" - - schema[field] = {"type": "array", "items": {"type": item_type}} - data[field] = pure_list - - # 5) Primitives + # Determine item type from first element (if any) + if payload: + first = next(iter(payload)) + item_schema = _serialize_value_with_schema(first) + base_schema = {"type": "array", "items": item_schema["serialization_schema"]} else: - prim_type = _primitive_schema_type(val) - schema[field] = {"type": prim_type} - data[field] = val + base_schema = {"type": "array", "items": {}} - return {"serialization_schema": schema, "serialized_data": data} + # Add JSON Schema properties to infer sets and tuples + if isinstance(payload, set): + base_schema["uniqueItems"] = True + elif isinstance(payload, tuple): + base_schema["minItems"] = len(payload) + base_schema["maxItems"] = len(payload) + + return {"serialization_schema": base_schema, "serialized_data": pure_list} + + # Handle Haystack style objects (e.g. dataclasses and Components) + elif hasattr(payload, "to_dict") and callable(payload.to_dict): + type_name = generate_qualified_class_name(type(payload)) + pure = _convert_to_basic_types(payload) + schema = {"type": type_name} + return {"serialization_schema": schema, "serialized_data": pure} + + # Handle arbitrary objects with __dict__ + elif hasattr(payload, "__dict__"): + type_name = generate_qualified_class_name(type(payload)) + pure = _convert_to_basic_types(vars(payload)) + schema = {"type": type_name} + return {"serialization_schema": schema, "serialized_data": pure} + + # Handle primitives + else: + prim_type = _primitive_schema_type(payload) + schema = {"type": prim_type} + return {"serialization_schema": schema, "serialized_data": payload} def _primitive_schema_type(value: Any) -> str: @@ -172,69 +178,103 @@ def _convert_to_basic_types(value: Any) -> Any: # sequences if isinstance(value, (list, tuple, set)): - cls = type(value) - return cls(_convert_to_basic_types(v) for v in value) + return [_convert_to_basic_types(v) for v in value] # primitive return value -# TODO: Make this function public once its implementation is finalized and tested -def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Dict[str, Any]: +def _deserialize_value_with_schema(serialized: Dict[str, Any]) -> Any: # pylint: disable=too-many-return-statements, # noqa: PLR0911, PLR0912 """ - Deserializes a dictionary with schema information and data to original values. + Deserializes a value with schema information back to its original form. Takes a dict of the form: { - "schema": { - "numbers": {"type": "integer"}, - "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, - }, - "data": { - "numbers": 1, - "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}], + "serialization_schema": {"type": "integer"} or {"type": "object", "properties": {...}}, + "serialized_data": } :param serialized: The serialized dict with schema and data. - :returns: The deserialized dict with original values. + :returns: The deserialized value in its original form. """ - schema = serialized.get("serialization_schema", {}) - data = serialized.get("serialized_data", {}) - result: Dict[str, Any] = {} - for field, raw in data.items(): - info = schema.get(field) - # no schema entry → just deep-deserialize whatever we have - if not info: - result[field] = _deserialize_value(raw) - continue + if not serialized or "serialization_schema" not in serialized or "serialized_data" not in serialized: + raise DeserializationError( + f"Invalid format of passed serialized payload. Expected a dictionary with keys " + f"'serialization_schema' and 'serialized_data'. Got: {serialized}" + ) + schema = serialized["serialization_schema"] + data = serialized["serialized_data"] - t = info["type"] + schema_type = schema.get("type") - # ARRAY case - if t == "array": - item_type = info["items"]["type"] - reconstructed = [] - for item in raw: - envelope = {"type": item_type, "data": item} - reconstructed.append(_deserialize_value(envelope)) - result[field] = reconstructed + if not schema_type: + # for backward comaptability till Haystack 2.16 we use legacy implementation + warnings.warn( + "Missing 'type' key in 'serialization_schema'. This likely indicates that you're using a serialized " + "State object created with a version of Haystack older than 2.15.0. " + "Support for the old serialization format will be removed in Haystack 2.16.0. " + "Please upgrade to the new serialization format to ensure forward compatibility.", + DeprecationWarning, + ) + return _deserialize_value_with_schema_legacy(serialized) - # PRIMITIVE case - elif t in ("null", "boolean", "integer", "number", "string"): - result[field] = raw + # Handle object case (dictionary with properties) + if schema_type == "object": + properties = schema.get("properties") + if properties: + result: Dict[str, Any] = {} - # GENERIC OBJECT - elif t == "object": - envelope = {"type": "object", "data": raw} - result[field] = _deserialize_value(envelope) + if isinstance(data, dict): + for field, raw_value in data.items(): + field_schema = properties.get(field) + if field_schema: + # Recursively deserialize each field - avoid creating temporary dict + result[field] = _deserialize_value_with_schema( + {"serialization_schema": field_schema, "serialized_data": raw_value} + ) - # CUSTOM CLASS + return result else: - envelope = {"type": t, "data": raw} - result[field] = _deserialize_value(envelope) + return _deserialize_value(data) - return result + # Handle array case + elif schema_type == "array": + # Cache frequently accessed schema properties + item_schema = schema.get("items", {}) + item_type = item_schema.get("type", "any") + is_set = schema.get("uniqueItems") is True + is_tuple = schema.get("minItems") is not None and schema.get("maxItems") is not None + + # Handle nested objects/arrays first (most complex case) + if item_type in ("object", "array"): + return [ + _deserialize_value_with_schema({"serialization_schema": item_schema, "serialized_data": item}) + for item in data + ] + + # Helper function to deserialize individual items + def deserialize_item(item): + if item_type == "any": + return _deserialize_value(item) + else: + return _deserialize_value({"type": item_type, "data": item}) + + # Handle different collection types + if is_set: + return {deserialize_item(item) for item in data} + elif is_tuple: + return tuple(deserialize_item(item) for item in data) + else: + return [deserialize_item(item) for item in data] + + # Handle primitive types + elif schema_type in ("null", "boolean", "integer", "number", "string"): + return data + + # Handle custom class types + else: + return _deserialize_value({"type": schema_type, "data": data}) def _deserialize_value(value: Any) -> Any: # pylint: disable=too-many-return-statements # noqa: PLR0911 @@ -291,3 +331,61 @@ def _deserialize_value(value: Any) -> Any: # pylint: disable=too-many-return-st # 4) Fallback (shouldn't usually happen with our schema) return value + + +def _deserialize_value_with_schema_legacy(serialized: Dict[str, Any]) -> Dict[str, Any]: + """ + Legacy function for deserializing a dictionary with schema information and data to original values. + + Kept for backward compatibility till Haystack 2.16.0. + Takes a dict of the form: + { + "schema": { + "numbers": {"type": "integer"}, + "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, + }, + "data": { + "numbers": 1, + "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}], + } + + :param serialized: The serialized dict with schema and data. + :returns: The deserialized dict with original values. + """ + schema = serialized.get("serialization_schema", {}) + data = serialized.get("serialized_data", {}) + + result: Dict[str, Any] = {} + for field, raw in data.items(): + info = schema.get(field) + # no schema entry → just deep-deserialize whatever we have + if not info: + result[field] = _deserialize_value(raw) + continue + + t = info["type"] + + # ARRAY case + if t == "array": + item_type = info["items"]["type"] + reconstructed = [] + for item in raw: + envelope = {"type": item_type, "data": item} + reconstructed.append(_deserialize_value(envelope)) + result[field] = reconstructed + + # PRIMITIVE case + elif t in ("null", "boolean", "integer", "number", "string"): + result[field] = raw + + # GENERIC OBJECT + elif t == "object": + envelope = {"type": "object", "data": raw} + result[field] = _deserialize_value(envelope) + + # CUSTOM CLASS + else: + envelope = {"type": t, "data": raw} + result[field] = _deserialize_value(envelope) + + return result diff --git a/test/components/agents/test_state_class.py b/test/components/agents/test_state_class.py index 43d925dea..73acc6ac1 100644 --- a/test/components/agents/test_state_class.py +++ b/test/components/agents/test_state_class.py @@ -368,9 +368,15 @@ class TestState: } assert state_dict["data"] == { "serialization_schema": { - "numbers": {"type": "integer"}, - "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, - "dict_of_lists": {"type": "object"}, + "type": "object", + "properties": { + "numbers": {"type": "integer"}, + "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, + "dict_of_lists": { + "type": "object", + "properties": {"numbers": {"type": "array", "items": {"type": "integer"}}}, + }, + }, }, "serialized_data": { "numbers": 1, @@ -380,6 +386,57 @@ class TestState: } def test_state_from_dict(self): + state_dict = { + "schema": { + "numbers": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, + "messages": { + "type": "typing.List[haystack.dataclasses.chat_message.ChatMessage]", + "handler": "haystack.components.agents.state.state_utils.merge_lists", + }, + "dict_of_lists": { + "type": "dict", + "handler": "haystack.components.agents.state.state_utils.replace_values", + }, + }, + "data": { + "serialization_schema": { + "type": "object", + "properties": { + "numbers": {"type": "integer"}, + "messages": { + "type": "array", + "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}, + }, + "dict_of_lists": { + "type": "object", + "properties": {"numbers": {"type": "array", "items": {"type": "integer"}}}, + }, + }, + }, + "serialized_data": { + "numbers": 1, + "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}], + "dict_of_lists": {"numbers": [1, 2, 3]}, + }, + }, + } + state = State.from_dict(state_dict) + # Check types are correctly converted + assert state.schema["numbers"]["type"] == int + assert state.schema["dict_of_lists"]["type"] == dict + # Check handlers are functions, not comparing exact functions as they might be different references + assert callable(state.schema["numbers"]["handler"]) + assert callable(state.schema["messages"]["handler"]) + assert callable(state.schema["dict_of_lists"]["handler"]) + # Check data is correct + assert state.data["numbers"] == 1 + assert state.data["messages"] == [ChatMessage.from_user(text="Hello, world!")] + assert state.data["dict_of_lists"] == {"numbers": [1, 2, 3]} + + def test_state_from_dict_legacy(self): + # this is the old format of the state dictionary + # it is kept for backward compatibility + # it will be removed in Haystack 2.16.0 state_dict = { "schema": { "numbers": {"type": "int", "handler": "haystack.components.agents.state.state_utils.replace_values"}, diff --git a/test/utils/test_base_serialization.py b/test/utils/test_base_serialization.py index a4c7049ce..7bd54a84f 100644 --- a/test/utils/test_base_serialization.py +++ b/test/utils/test_base_serialization.py @@ -74,10 +74,233 @@ def test_deserialize_class_instance_invalid_data(): deserialize_class_instance({"type": "test_base_serialization.CustomClassNoFromDict", "data": {}}) +def test_serialize_value_primitive_types(): + numbers = 1 + string = "test" + bool = True + none = None + result = _serialize_value_with_schema(numbers) + assert result == {"serialization_schema": {"type": "integer"}, "serialized_data": 1} + result = _serialize_value_with_schema(string) + assert result == {"serialization_schema": {"type": "string"}, "serialized_data": "test"} + result = _serialize_value_with_schema(bool) + assert result == {"serialization_schema": {"type": "boolean"}, "serialized_data": True} + result = _serialize_value_with_schema(none) + assert result == {"serialization_schema": {"type": "null"}, "serialized_data": None} + + +def test_deserialize_value_primitive_types(): + result = _deserialize_value_with_schema({"serialization_schema": {"type": "integer"}, "serialized_data": 1}) + assert result == 1 + result = _deserialize_value_with_schema({"serialization_schema": {"type": "string"}, "serialized_data": "test"}) + assert result == "test" + result = _deserialize_value_with_schema({"serialization_schema": {"type": "boolean"}, "serialized_data": True}) + assert result == True + result = _deserialize_value_with_schema({"serialization_schema": {"type": "null"}, "serialized_data": None}) + assert result == None + + +def test_serialize_value_with_sequences(): + sequences = [1, 2, 3] + set_sequences = {1, 2, 3} + tuple_sequences = (1, 2, 3) + result = _serialize_value_with_schema(sequences) + assert result == { + "serialization_schema": {"type": "array", "items": {"type": "integer"}}, + "serialized_data": [1, 2, 3], + } + result = _serialize_value_with_schema(set_sequences) + assert result == { + "serialization_schema": {"type": "array", "items": {"type": "integer"}, "uniqueItems": True}, + "serialized_data": [1, 2, 3], + } + result = _serialize_value_with_schema(tuple_sequences) + assert result == { + "serialization_schema": {"type": "array", "items": {"type": "integer"}, "minItems": 3, "maxItems": 3}, + "serialized_data": [1, 2, 3], + } + + +def test_deserialize_value_with_sequences(): + sequences = [1, 2, 3] + set_sequences = {1, 2, 3} + tuple_sequences = (1, 2, 3) + result = _deserialize_value_with_schema( + {"serialization_schema": {"type": "array", "items": {"type": "integer"}}, "serialized_data": [1, 2, 3]} + ) + assert result == sequences + result = _deserialize_value_with_schema( + { + "serialization_schema": {"type": "array", "items": {"type": "integer"}, "uniqueItems": True}, + "serialized_data": [1, 2, 3], + } + ) + assert result == set_sequences + result = _deserialize_value_with_schema( + { + "serialization_schema": { + "type": "array", + "items": {"type": "integer"}, + "collection_type": "tuple", + "minItems": 3, + "maxItems": 3, + }, + "serialized_data": [1, 2, 3], + } + ) + assert result == tuple_sequences + + +def test_serializing_and_deserializing_nested_lists(): + nested_lists = [[1, 2], [3, 4]] + + serialized_nested_lists = _serialize_value_with_schema(nested_lists) + assert serialized_nested_lists == { + "serialization_schema": {"type": "array", "items": {"type": "array", "items": {"type": "integer"}}}, + "serialized_data": [[1, 2], [3, 4]], + } + + deserialized_nested_lists = _deserialize_value_with_schema(serialized_nested_lists) + assert deserialized_nested_lists == nested_lists + + +def test_serializing_and_deserializing_nested_answer_lists(): + """Test that _deserialize_value_with_schema handles nested lists""" + + nested_answers_list = [ + [ + GeneratedAnswer( + data="Paris", + query="What is the capital of France?", + documents=[Document(content="Paris is the capital of France")], + meta={"page": 1}, + ) + ], + [ + GeneratedAnswer( + data="Berlin", + query="What is the capital of Germany?", + documents=[Document(content="Berlin is the capital of Germany")], + meta={"page": 1}, + ) + ], + ] + serialized_nested_answers_list = _serialize_value_with_schema(nested_answers_list) + assert serialized_nested_answers_list == { + "serialization_schema": { + "type": "array", + "items": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}}, + }, + "serialized_data": [ + [ + { + "type": "haystack.dataclasses.answer.GeneratedAnswer", + "init_parameters": { + "data": "Paris", + "query": "What is the capital of France?", + "documents": [ + { + "id": "413dccdf51a54cca75b7ed2eddac04e6e58560bd2f0caf4106a3efc023fe3651", + "content": "Paris is the capital of France", + "blob": None, + "meta": {}, + "score": None, + "embedding": None, + "sparse_embedding": None, + } + ], + "meta": {"page": 1}, + }, + } + ], + [ + { + "type": "haystack.dataclasses.answer.GeneratedAnswer", + "init_parameters": { + "data": "Berlin", + "query": "What is the capital of Germany?", + "documents": [ + { + "id": "c7b5b839963fcbf9b394b24c883731e840c3170ace33afb7af87a2de8a257f6f", + "content": "Berlin is the capital of Germany", + "blob": None, + "meta": {}, + "score": None, + "embedding": None, + "sparse_embedding": None, + } + ], + "meta": {"page": 1}, + }, + } + ], + ], + } + + deserialized_nested_answers_list = _deserialize_value_with_schema(serialized_nested_answers_list) + assert deserialized_nested_answers_list == nested_answers_list + + +def test_serializing_and_deserializing_nested_dicts(): + data = {"key1": {"nested1": "value1", "nested2": {"deep": "value2"}}} + serialized_nested_dicts = _serialize_value_with_schema(data) + assert serialized_nested_dicts == { + "serialization_schema": { + "type": "object", + "properties": { + "key1": { + "type": "object", + "properties": { + "nested1": {"type": "string"}, + "nested2": {"type": "object", "properties": {"deep": {"type": "string"}}}, + }, + } + }, + }, + "serialized_data": {"key1": {"nested1": "value1", "nested2": {"deep": "value2"}}}, + } + + deserialized_nested_dicts = _deserialize_value_with_schema(serialized_nested_dicts) + assert deserialized_nested_dicts == data + + +def test_serializing_and_deserializing_nested_sets(): + nested_sets = [{1, 2}, {3, 4}] + + result = _serialize_value_with_schema(nested_sets) + assert result == { + "serialization_schema": { + "items": {"items": {"type": "integer"}, "type": "array", "uniqueItems": True}, + "type": "array", + }, + "serialized_data": [[1, 2], [3, 4]], + } + + result = _deserialize_value_with_schema( + { + "serialization_schema": { + "items": {"items": {"type": "integer"}, "type": "array", "uniqueItems": True}, + "type": "array", + }, + "serialized_data": [[1, 2], [3, 4]], + } + ) + assert result == nested_sets + + +def test_serializing_and_deserializing_empty_structures(): + """Test that _deserialize_value_with_schema handles empty structures""" + data = {"empty_list": [], "empty_dict": {}, "nested_empty": {"empty": []}} + serialized_data = _serialize_value_with_schema(data) + result = _deserialize_value_with_schema(serialized_data) + + assert result == data + + def test_serialize_value_with_schema(): data = { "numbers": 1, - "messages": [ChatMessage.from_user(text="Hello, world!")], + "messages": [ChatMessage.from_user(text="Hello, world!"), ChatMessage.from_assistant(text="Hello, world!")], "user_id": "123", "dict_of_lists": {"numbers": [1, 2, 3]}, "documents": [Document(content="Hello, world!")], @@ -94,17 +317,32 @@ def test_serialize_value_with_schema(): result = _serialize_value_with_schema(data) assert result == { "serialization_schema": { - "numbers": {"type": "integer"}, - "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, - "user_id": {"type": "string"}, - "dict_of_lists": {"type": "object"}, - "documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}}, - "list_of_dicts": {"type": "array", "items": {"type": "string"}}, - "answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}}, + "type": "object", + "properties": { + "numbers": {"type": "integer"}, + "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, + "user_id": {"type": "string"}, + "dict_of_lists": { + "type": "object", + "properties": {"numbers": {"type": "array", "items": {"type": "integer"}}}, + }, + "documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}}, + "list_of_dicts": { + "type": "array", + "items": { + "type": "object", + "properties": {"numbers": {"type": "array", "items": {"type": "integer"}}}, + }, + }, + "answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}}, + }, }, "serialized_data": { "numbers": 1, - "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}], + "messages": [ + {"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}, + {"role": "assistant", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}, + ], "user_id": "123", "dict_of_lists": {"numbers": [1, 2, 3]}, "documents": [ @@ -146,17 +384,26 @@ def test_serialize_value_with_schema(): def test_deserialize_value_with_schema(): serialized__data = { "serialization_schema": { - "numbers": {"type": "integer"}, - "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, - "user_id": {"type": "string"}, - "dict_of_lists": {"type": "object"}, - "documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}}, - "list_of_dicts": {"type": "array", "items": {"type": "string"}}, - "answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}}, + "type": "object", + "properties": { + "numbers": {"type": "integer"}, + "messages": {"type": "array", "items": {"type": "haystack.dataclasses.chat_message.ChatMessage"}}, + "user_id": {"type": "string"}, + "dict_of_lists": { + "type": "object", + "properties": {"numbers": {"type": "array", "items": {"type": "integer"}}}, + }, + "documents": {"type": "array", "items": {"type": "haystack.dataclasses.document.Document"}}, + "list_of_dicts": {"type": "array", "items": {"type": "string"}}, + "answers": {"type": "array", "items": {"type": "haystack.dataclasses.answer.GeneratedAnswer"}}, + }, }, "serialized_data": { "numbers": 1, - "messages": [{"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}], + "messages": [ + {"role": "user", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}, + {"role": "assistant", "meta": {}, "name": None, "content": [{"text": "Hello, world!"}]}, + ], "user_id": "123", "dict_of_lists": {"numbers": [1, 2, 3]}, "documents": [ @@ -205,27 +452,21 @@ def test_deserialize_value_with_schema(): assert isinstance(result["answers"][0], GeneratedAnswer) -def test_serialize_value_with_custom_class_type(): +def test_serializing_and_deserializing_custom_class_type(): custom_type = CustomClass() data = {"numbers": 1, "custom_type": custom_type} - result = _serialize_value_with_schema(data) - assert result == { + serialized_data = _serialize_value_with_schema(data) + assert serialized_data == { "serialization_schema": { - "numbers": {"type": "integer"}, - "custom_type": {"type": "test_base_serialization.CustomClass"}, + "properties": { + "custom_type": {"type": "test_base_serialization.CustomClass"}, + "numbers": {"type": "integer"}, + }, + "type": "object", }, "serialized_data": {"numbers": 1, "custom_type": {"key": "value", "more": False}}, } - -def test_deserialize_value_with_custom_class_type(): - serialized_data = { - "serialization_schema": { - "numbers": {"type": "integer"}, - "custom_type": {"type": "test_base_serialization.CustomClass"}, - }, - "serialized_data": {"numbers": 1, "custom_type": {"key": "value", "more": False}}, - } - result = _deserialize_value_with_schema(serialized_data) - assert result["numbers"] == 1 - assert isinstance(result["custom_type"], CustomClass) + deserialized_data = _deserialize_value_with_schema(serialized_data) + assert deserialized_data["numbers"] == 1 + assert isinstance(deserialized_data["custom_type"], CustomClass)