refactor: Change no_answer attribute (#3411)

* always run validation * update schemas * no_answer as a property. break things! * forgotten schema * fix * update openapi * removed my unnecessary test * fix sql document store Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
2025-10-04 04:26:46 +00:00 · 2022-10-25 13:07:00 +02:00 · 2022-10-25 13:07:00 +02:00 · 54ec13eaf7
commit 54ec13eaf7
parent 6a422d588f
9 changed files with 7 additions and 69 deletions
--- a/docs/_src/api/openapi/openapi-1.11.0rc0.json
+++ b/docs/_src/api/openapi/openapi-1.11.0rc0.json
@ -876,10 +876,6 @@
                    "answer": {
                        "$ref": "#/components/schemas/Answer"
                    },
                    "no_answer": {
                        "title": "No Answer",
                        "type": "boolean"
                    },
                    "pipeline_id": {
                        "title": "Pipeline Id",
                        "type": "string"
--- a/docs/_src/api/openapi/openapi.json
+++ b/docs/_src/api/openapi/openapi.json
@ -876,10 +876,6 @@
                    "answer": {
                        "$ref": "#/components/schemas/Answer"
                    },
                    "no_answer": {
                        "title": "No Answer",
                        "type": "boolean"
                    },
                    "pipeline_id": {
                        "title": "Pipeline Id",
                        "type": "string"
--- a/haystack/document_stores/pinecone.py
+++ b/haystack/document_stores/pinecone.py
@ -1416,7 +1416,6 @@ class PineconeDocumentStore(BaseDocumentStore):
                query=label_meta["query"],
                document=doc,
                answer=answer,
                no_answer=label_meta["label-no-answer"],
                pipeline_id=label_meta["label-pipeline-id"],
                created_at=label_meta["label-created-at"],
                updated_at=label_meta["label-updated-at"],
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@ -586,7 +586,6 @@ class SQLDocumentStore(BaseDocumentStore):
            is_correct_document=row.is_correct_document,
            origin=row.origin,
            id=row.id,
            no_answer=row.no_answer,
            pipeline_id=row.pipeline_id,
            created_at=str(row.created_at),
            updated_at=str(row.updated_at),
--- a/haystack/document_stores/utils.py
+++ b/haystack/document_stores/utils.py
@ -187,7 +187,6 @@ def _extract_docs_and_labels_from_dict(
                            document=None,  # type: ignore
                            is_correct_answer=True,
                            is_correct_document=True,
                            no_answer=qa.get("is_impossible", False),
                            origin="gold-label",
                        )
                        labels.append(label)
@ -229,7 +228,6 @@ def _extract_docs_and_labels_from_dict(
                            document=cur_doc,
                            is_correct_answer=True,
                            is_correct_document=True,
                            no_answer=qa.get("is_impossible", False),
                            origin="gold-label",
                        )
                        labels.append(label)
@ -248,7 +246,6 @@ def _extract_docs_and_labels_from_dict(
                        document=s,
                        is_correct_answer=True,
                        is_correct_document=True,
                        no_answer=qa.get("is_impossible", False),
                        origin="gold-label",
                    )
--- a/haystack/schema.py
+++ b/haystack/schema.py
@ -494,7 +494,6 @@ class Label:
    is_correct_document: bool
    origin: Literal["user-feedback", "gold-label"]
    answer: Optional[Answer] = None
    no_answer: Optional[bool] = None
    pipeline_id: Optional[str] = None
    created_at: Optional[str] = None
    updated_at: Optional[str] = None
@ -512,7 +511,6 @@ class Label:
        origin: Literal["user-feedback", "gold-label"],
        answer: Optional[Answer],
        id: Optional[str] = None,
        no_answer: Optional[bool] = None,
        pipeline_id: Optional[str] = None,
        created_at: Optional[str] = None,
        updated_at: Optional[str] = None,
@ -533,7 +531,6 @@ class Label:
                                    the returned document was correct.
        :param origin: the source for the labels. It can be used to later for filtering.
        :param id: Unique ID used within the DocumentStore. If not supplied, a uuid will be generated automatically.
        :param no_answer: whether the question in unanswerable.
        :param pipeline_id: pipeline identifier (any str) that was involved for generating this label (in-case of user feedback).
        :param created_at: Timestamp of creation with format yyyy-MM-dd HH:mm:ss.
                           Generate in Python via time.strftime("%Y-%m-%d %H:%M:%S").
@ -571,23 +568,6 @@ class Label:
        self.is_correct_document = is_correct_document
        self.origin = origin
        # If an Answer is provided we need to make sure that it's consistent with the `no_answer` value
        # TODO: reassess if we want to enforce Span.start=0 and Span.end=0 for no_answer=True
        if self.answer is not None:
            if no_answer == True:
                if self.answer.answer != "" or self.answer.context:
                    raise ValueError(f"Got no_answer == True while there seems to be an possible Answer: {self.answer}")
            elif no_answer == False:
                if self.answer.answer == "":
                    raise ValueError(
                        f"Got no_answer == False while there seems to be no possible Answer: {self.answer}"
                    )
            else:
                # Automatically infer no_answer from Answer object
                no_answer = self.answer.answer == "" or self.answer.answer is None
        self.no_answer = no_answer
        # TODO autofill answer.document_id if Document is provided
        self.pipeline_id = pipeline_id
@ -597,6 +577,13 @@ class Label:
            self.meta = meta
        self.filters = filters
    @property
    def no_answer(self) -> Optional[bool]:
        no_answer = None
        if self.answer is not None:
            no_answer = self.answer.answer is None or self.answer.answer.strip() == ""
        return no_answer
    def to_dict(self):
        return asdict(self)
@ -657,7 +644,6 @@ class MultiLabel:
    labels: List[Label]
    query: str
    answers: List[str]
    no_answer: bool
    document_ids: List[str]
    contexts: List[str]
    offsets_in_contexts: List[Dict]
--- a/haystack/utils/deepsetcloud.py
+++ b/haystack/utils/deepsetcloud.py
@ -830,7 +830,6 @@ class EvaluationSetClient:
                origin="user-feedback",
                answer=Answer(label_dict["answer"]),
                id=label_dict["label_id"],
                no_answer=False if label_dict.get("answer", None) else True,
                pipeline_id=None,
                created_at=None,
                updated_at=None,
--- a/test/document_stores/test_document_store.py
+++ b/test/document_stores/test_document_store.py
@ -801,7 +801,6 @@ def test_labels(document_store: BaseDocumentStore):
        is_correct_answer=True,
        is_correct_document=True,
        document=Document(content="something", id="123"),
        no_answer=False,
        origin="gold-label",
    )
    document_store.write_labels([label])
@ -833,7 +832,6 @@ def test_labels(document_store: BaseDocumentStore):
        is_correct_answer=True,
        is_correct_document=True,
        document=Document(content="something", id="324"),
        no_answer=False,
        origin="gold-label",
    )
    document_store.write_labels([label, label2])
@ -890,7 +888,6 @@ def test_multilabel(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        # different answer in same doc
@ -901,7 +898,6 @@ def test_multilabel(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        # answer in different doc
@ -912,7 +908,6 @@ def test_multilabel(document_store: BaseDocumentStore):
            document=Document(content="some other", id="333"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        # 'no answer', should be excluded from MultiLabel
@ -923,7 +918,6 @@ def test_multilabel(document_store: BaseDocumentStore):
            document=Document(content="some", id="777"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=True,
            origin="gold-label",
        ),
        # is_correct_answer=False, should be excluded from MultiLabel if "drop_negatives = True"
@ -934,7 +928,6 @@ def test_multilabel(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=False,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
    ]
@ -995,7 +988,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
            is_correct_answer=True,
            is_correct_document=True,
            document=Document(content="some", id="777"),
            no_answer=True,
            origin="gold-label",
        ),
        # no answer in different doc
@ -1005,7 +997,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
            is_correct_answer=True,
            is_correct_document=True,
            document=Document(content="some", id="123"),
            no_answer=True,
            origin="gold-label",
        ),
        # no answer in same doc, should be excluded
@ -1015,7 +1006,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
            is_correct_answer=True,
            is_correct_document=True,
            document=Document(content="some", id="777"),
            no_answer=True,
            origin="gold-label",
        ),
        # no answer with is_correct_answer=False, should be excluded
@ -1025,7 +1015,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
            is_correct_answer=False,
            is_correct_document=True,
            document=Document(content="some", id="777"),
            no_answer=True,
            origin="gold-label",
        ),
    ]
@ -1065,7 +1054,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            filters={"name": ["123"]},
        ),
@ -1077,7 +1065,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            filters={"name": ["123"]},
        ),
@ -1089,7 +1076,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some other", id="333"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            filters={"name": ["333"]},
        ),
@ -1101,7 +1087,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="777"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=True,
            origin="gold-label",
            filters={"name": ["777"]},
        ),
@ -1113,7 +1098,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=False,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            filters={"name": ["123"]},
        ),
@ -1157,7 +1141,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            meta={"file_id": ["123"]},
        ),
@ -1169,7 +1152,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            meta={"file_id": ["123"]},
        ),
@ -1181,7 +1163,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some other", id="333"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            meta={"file_id": ["333"]},
        ),
@ -1193,7 +1174,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="777"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=True,
            origin="gold-label",
            meta={"file_id": ["777"]},
        ),
@ -1205,7 +1185,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
            meta={"file_id": ["888"]},
        ),
@ -1903,7 +1882,6 @@ def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_clo
            origin="user-feedback",
            answer=Answer("biggest city in germany"),
            id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
            no_answer=False,
            pipeline_id=None,
            created_at=None,
            updated_at=None,
--- a/test/others/test_schema.py
+++ b/test/others/test_schema.py
@ -61,7 +61,6 @@ def test_no_answer_label():
            is_correct_answer=True,
            is_correct_document=True,
            document=Document(content="some", id="777"),
            no_answer=True,
            origin="gold-label",
        ),
        Label(
@ -78,7 +77,6 @@ def test_no_answer_label():
            is_correct_answer=True,
            is_correct_document=True,
            document=Document(content="some", id="777"),
            no_answer=False,
            origin="gold-label",
        ),
    ]
@ -249,7 +247,6 @@ def test_multilabel_preserve_order():
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -259,7 +256,6 @@ def test_multilabel_preserve_order():
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -269,7 +265,6 @@ def test_multilabel_preserve_order():
            document=Document(content="some other", id="333"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -279,7 +274,6 @@ def test_multilabel_preserve_order():
            document=Document(content="some", id="777"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=True,
            origin="gold-label",
        ),
        Label(
@ -289,7 +283,6 @@ def test_multilabel_preserve_order():
            document=Document(content="some", id="123"),
            is_correct_answer=False,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
    ]
@ -309,7 +302,6 @@ def test_multilabel_preserve_order_w_duplicates():
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -319,7 +311,6 @@ def test_multilabel_preserve_order_w_duplicates():
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -329,7 +320,6 @@ def test_multilabel_preserve_order_w_duplicates():
            document=Document(content="some other", id="333"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -339,7 +329,6 @@ def test_multilabel_preserve_order_w_duplicates():
            document=Document(content="some", id="123"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
        Label(
@ -349,7 +338,6 @@ def test_multilabel_preserve_order_w_duplicates():
            document=Document(content="some other", id="333"),
            is_correct_answer=True,
            is_correct_document=True,
            no_answer=False,
            origin="gold-label",
        ),
    ]