Fix broken MultiLabel serialization (#3037)

* Fix MultiLabel serialization * update docs * better comment * remove unused imports * remove unused imports (2)
2026-01-06 03:57:19 +00:00 · 2022-08-15 13:09:18 +02:00 · 2022-08-15 13:09:18 +02:00 · 0aa0c68785
commit 0aa0c68785
parent ff38a20863
2 changed files with 14 additions and 7 deletions
--- a/docs/_src/api/api/primitives.md
+++ b/docs/_src/api/api/primitives.md
@ -272,7 +272,7 @@ class MultiLabel()
 #### MultiLabel.\_\_init\_\_

 ```python
-def __init__(labels: List[Label], drop_negative_labels=False, drop_no_answers=False)
+def __init__(labels: List[Label], drop_negative_labels=False, drop_no_answers=False, **kwargs)
 ```

 There are often multiple `Labels` associated with a single query. For example, there can be multiple annotated
@ -288,6 +288,7 @@ underlying Labels provided a text answer and therefore demonstrates that there i
 - `labels`: A list of labels that belong to a similar query and shall be "grouped" together
 - `drop_negative_labels`: Whether to drop negative labels from that group (e.g. thumbs down feedback from UI)
 - `drop_no_answers`: Whether to drop labels that specify the answer is impossible
+- `kwargs`: All additional attributes are ignored. This is just a workaround to enable smooth `to_dict()`-`from_dict()`-(de)serialization.

 <a id="schema.EvaluationResult"></a>

--- a/haystack/schema.py
+++ b/haystack/schema.py
@ -509,16 +509,21 @@ class Label:

        self.updated_at = updated_at
        self.query = query
+
+        # TODO: fix MultiLabel serialization without hacking Label
+        # As this is called during pydantic validation when MultiLabel is being serialized,
+        # answer might still be a dict breaking the following no_answer validation code.
+        if isinstance(answer, dict):
+            answer = Answer.from_dict(answer)
        self.answer = answer
+        if isinstance(document, dict):
+            document = Document.from_dict(document)
        self.document = document
+
        self.is_correct_answer = is_correct_answer
        self.is_correct_document = is_correct_document
        self.origin = origin

-        # Remove
-        # self.document_id = document_id
-        # self.offset_start_in_doc = offset_start_in_doc
-
        # If an Answer is provided we need to make sure that it's consistent with the `no_answer` value
        # TODO: reassess if we want to enforce Span.start=0 and Span.end=0 for no_answer=True
        if self.answer is not None:
@ -611,7 +616,7 @@ class MultiLabel:
    offsets_in_contexts: List[Dict]
    offsets_in_documents: List[Dict]

-    def __init__(self, labels: List[Label], drop_negative_labels=False, drop_no_answers=False):
+    def __init__(self, labels: List[Label], drop_negative_labels=False, drop_no_answers=False, **kwargs):
        """
        There are often multiple `Labels` associated with a single query. For example, there can be multiple annotated
        answers for one question or multiple documents contain the information you want for a query.
@ -623,6 +628,7 @@ class MultiLabel:
        :param labels: A list of labels that belong to a similar query and shall be "grouped" together
        :param drop_negative_labels: Whether to drop negative labels from that group (e.g. thumbs down feedback from UI)
        :param drop_no_answers: Whether to drop labels that specify the answer is impossible
+        :param kwargs: All additional attributes are ignored. This is just a workaround to enable smooth `to_dict()`-`from_dict()`-(de)serialization.
        """
        # drop duplicate labels and remove negative labels if needed.
        labels = list(dict.fromkeys(labels))
@ -714,7 +720,7 @@ class MultiLabel:
 def _pydantic_dataclass_from_dict(dict: dict, pydantic_dataclass_type) -> Any:
    """
    Constructs a pydantic dataclass from a dict incl. other nested dataclasses.
-    This allows simple de-serialization of pydentic dataclasses from json.
+    This allows simple de-serialization of pydantic dataclasses from json.
    :param dict: Dict containing all attributes and values for the dataclass.
    :param pydantic_dataclass_type: The class of the dataclass that should be constructed (e.g. Document)
    """