diff --git a/docs/_src/api/api/primitives.md b/docs/_src/api/api/primitives.md index 194bf8e7b..8be8ee27e 100644 --- a/docs/_src/api/api/primitives.md +++ b/docs/_src/api/api/primitives.md @@ -272,7 +272,7 @@ class MultiLabel() #### MultiLabel.\_\_init\_\_ ```python -def __init__(labels: List[Label], drop_negative_labels=False, drop_no_answers=False) +def __init__(labels: List[Label], drop_negative_labels=False, drop_no_answers=False, **kwargs) ``` There are often multiple `Labels` associated with a single query. For example, there can be multiple annotated @@ -288,6 +288,7 @@ underlying Labels provided a text answer and therefore demonstrates that there i - `labels`: A list of labels that belong to a similar query and shall be "grouped" together - `drop_negative_labels`: Whether to drop negative labels from that group (e.g. thumbs down feedback from UI) - `drop_no_answers`: Whether to drop labels that specify the answer is impossible +- `kwargs`: All additional attributes are ignored. This is just a workaround to enable smooth `to_dict()`-`from_dict()`-(de)serialization. diff --git a/haystack/schema.py b/haystack/schema.py index 7078afe08..55fce78a0 100644 --- a/haystack/schema.py +++ b/haystack/schema.py @@ -509,16 +509,21 @@ class Label: self.updated_at = updated_at self.query = query + + # TODO: fix MultiLabel serialization without hacking Label + # As this is called during pydantic validation when MultiLabel is being serialized, + # answer might still be a dict breaking the following no_answer validation code. + if isinstance(answer, dict): + answer = Answer.from_dict(answer) self.answer = answer + if isinstance(document, dict): + document = Document.from_dict(document) self.document = document + self.is_correct_answer = is_correct_answer self.is_correct_document = is_correct_document self.origin = origin - # Remove - # self.document_id = document_id - # self.offset_start_in_doc = offset_start_in_doc - # If an Answer is provided we need to make sure that it's consistent with the `no_answer` value # TODO: reassess if we want to enforce Span.start=0 and Span.end=0 for no_answer=True if self.answer is not None: @@ -611,7 +616,7 @@ class MultiLabel: offsets_in_contexts: List[Dict] offsets_in_documents: List[Dict] - def __init__(self, labels: List[Label], drop_negative_labels=False, drop_no_answers=False): + def __init__(self, labels: List[Label], drop_negative_labels=False, drop_no_answers=False, **kwargs): """ There are often multiple `Labels` associated with a single query. For example, there can be multiple annotated answers for one question or multiple documents contain the information you want for a query. @@ -623,6 +628,7 @@ class MultiLabel: :param labels: A list of labels that belong to a similar query and shall be "grouped" together :param drop_negative_labels: Whether to drop negative labels from that group (e.g. thumbs down feedback from UI) :param drop_no_answers: Whether to drop labels that specify the answer is impossible + :param kwargs: All additional attributes are ignored. This is just a workaround to enable smooth `to_dict()`-`from_dict()`-(de)serialization. """ # drop duplicate labels and remove negative labels if needed. labels = list(dict.fromkeys(labels)) @@ -714,7 +720,7 @@ class MultiLabel: def _pydantic_dataclass_from_dict(dict: dict, pydantic_dataclass_type) -> Any: """ Constructs a pydantic dataclass from a dict incl. other nested dataclasses. - This allows simple de-serialization of pydentic dataclasses from json. + This allows simple de-serialization of pydantic dataclasses from json. :param dict: Dict containing all attributes and values for the dataclass. :param pydantic_dataclass_type: The class of the dataclass that should be constructed (e.g. Document) """