refactor: Change no_answer attribute (#3411)

* always run validation

* update schemas

* no_answer as a property. break things!

* forgotten schema

* fix

* update openapi

* removed my unnecessary test

* fix sql document store

Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
This commit is contained in:
Stefano Fiorucci 2022-10-25 13:07:00 +02:00 committed by GitHub
parent 6a422d588f
commit 54ec13eaf7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 7 additions and 69 deletions

View File

@ -876,10 +876,6 @@
"answer": { "answer": {
"$ref": "#/components/schemas/Answer" "$ref": "#/components/schemas/Answer"
}, },
"no_answer": {
"title": "No Answer",
"type": "boolean"
},
"pipeline_id": { "pipeline_id": {
"title": "Pipeline Id", "title": "Pipeline Id",
"type": "string" "type": "string"

View File

@ -876,10 +876,6 @@
"answer": { "answer": {
"$ref": "#/components/schemas/Answer" "$ref": "#/components/schemas/Answer"
}, },
"no_answer": {
"title": "No Answer",
"type": "boolean"
},
"pipeline_id": { "pipeline_id": {
"title": "Pipeline Id", "title": "Pipeline Id",
"type": "string" "type": "string"

View File

@ -1416,7 +1416,6 @@ class PineconeDocumentStore(BaseDocumentStore):
query=label_meta["query"], query=label_meta["query"],
document=doc, document=doc,
answer=answer, answer=answer,
no_answer=label_meta["label-no-answer"],
pipeline_id=label_meta["label-pipeline-id"], pipeline_id=label_meta["label-pipeline-id"],
created_at=label_meta["label-created-at"], created_at=label_meta["label-created-at"],
updated_at=label_meta["label-updated-at"], updated_at=label_meta["label-updated-at"],

View File

@ -586,7 +586,6 @@ class SQLDocumentStore(BaseDocumentStore):
is_correct_document=row.is_correct_document, is_correct_document=row.is_correct_document,
origin=row.origin, origin=row.origin,
id=row.id, id=row.id,
no_answer=row.no_answer,
pipeline_id=row.pipeline_id, pipeline_id=row.pipeline_id,
created_at=str(row.created_at), created_at=str(row.created_at),
updated_at=str(row.updated_at), updated_at=str(row.updated_at),

View File

@ -187,7 +187,6 @@ def _extract_docs_and_labels_from_dict(
document=None, # type: ignore document=None, # type: ignore
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=qa.get("is_impossible", False),
origin="gold-label", origin="gold-label",
) )
labels.append(label) labels.append(label)
@ -229,7 +228,6 @@ def _extract_docs_and_labels_from_dict(
document=cur_doc, document=cur_doc,
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=qa.get("is_impossible", False),
origin="gold-label", origin="gold-label",
) )
labels.append(label) labels.append(label)
@ -248,7 +246,6 @@ def _extract_docs_and_labels_from_dict(
document=s, document=s,
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=qa.get("is_impossible", False),
origin="gold-label", origin="gold-label",
) )

View File

@ -494,7 +494,6 @@ class Label:
is_correct_document: bool is_correct_document: bool
origin: Literal["user-feedback", "gold-label"] origin: Literal["user-feedback", "gold-label"]
answer: Optional[Answer] = None answer: Optional[Answer] = None
no_answer: Optional[bool] = None
pipeline_id: Optional[str] = None pipeline_id: Optional[str] = None
created_at: Optional[str] = None created_at: Optional[str] = None
updated_at: Optional[str] = None updated_at: Optional[str] = None
@ -512,7 +511,6 @@ class Label:
origin: Literal["user-feedback", "gold-label"], origin: Literal["user-feedback", "gold-label"],
answer: Optional[Answer], answer: Optional[Answer],
id: Optional[str] = None, id: Optional[str] = None,
no_answer: Optional[bool] = None,
pipeline_id: Optional[str] = None, pipeline_id: Optional[str] = None,
created_at: Optional[str] = None, created_at: Optional[str] = None,
updated_at: Optional[str] = None, updated_at: Optional[str] = None,
@ -533,7 +531,6 @@ class Label:
the returned document was correct. the returned document was correct.
:param origin: the source for the labels. It can be used to later for filtering. :param origin: the source for the labels. It can be used to later for filtering.
:param id: Unique ID used within the DocumentStore. If not supplied, a uuid will be generated automatically. :param id: Unique ID used within the DocumentStore. If not supplied, a uuid will be generated automatically.
:param no_answer: whether the question in unanswerable.
:param pipeline_id: pipeline identifier (any str) that was involved for generating this label (in-case of user feedback). :param pipeline_id: pipeline identifier (any str) that was involved for generating this label (in-case of user feedback).
:param created_at: Timestamp of creation with format yyyy-MM-dd HH:mm:ss. :param created_at: Timestamp of creation with format yyyy-MM-dd HH:mm:ss.
Generate in Python via time.strftime("%Y-%m-%d %H:%M:%S"). Generate in Python via time.strftime("%Y-%m-%d %H:%M:%S").
@ -571,23 +568,6 @@ class Label:
self.is_correct_document = is_correct_document self.is_correct_document = is_correct_document
self.origin = origin self.origin = origin
# If an Answer is provided we need to make sure that it's consistent with the `no_answer` value
# TODO: reassess if we want to enforce Span.start=0 and Span.end=0 for no_answer=True
if self.answer is not None:
if no_answer == True:
if self.answer.answer != "" or self.answer.context:
raise ValueError(f"Got no_answer == True while there seems to be an possible Answer: {self.answer}")
elif no_answer == False:
if self.answer.answer == "":
raise ValueError(
f"Got no_answer == False while there seems to be no possible Answer: {self.answer}"
)
else:
# Automatically infer no_answer from Answer object
no_answer = self.answer.answer == "" or self.answer.answer is None
self.no_answer = no_answer
# TODO autofill answer.document_id if Document is provided # TODO autofill answer.document_id if Document is provided
self.pipeline_id = pipeline_id self.pipeline_id = pipeline_id
@ -597,6 +577,13 @@ class Label:
self.meta = meta self.meta = meta
self.filters = filters self.filters = filters
@property
def no_answer(self) -> Optional[bool]:
no_answer = None
if self.answer is not None:
no_answer = self.answer.answer is None or self.answer.answer.strip() == ""
return no_answer
def to_dict(self): def to_dict(self):
return asdict(self) return asdict(self)
@ -657,7 +644,6 @@ class MultiLabel:
labels: List[Label] labels: List[Label]
query: str query: str
answers: List[str] answers: List[str]
no_answer: bool
document_ids: List[str] document_ids: List[str]
contexts: List[str] contexts: List[str]
offsets_in_contexts: List[Dict] offsets_in_contexts: List[Dict]

View File

@ -830,7 +830,6 @@ class EvaluationSetClient:
origin="user-feedback", origin="user-feedback",
answer=Answer(label_dict["answer"]), answer=Answer(label_dict["answer"]),
id=label_dict["label_id"], id=label_dict["label_id"],
no_answer=False if label_dict.get("answer", None) else True,
pipeline_id=None, pipeline_id=None,
created_at=None, created_at=None,
updated_at=None, updated_at=None,

View File

@ -801,7 +801,6 @@ def test_labels(document_store: BaseDocumentStore):
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="something", id="123"), document=Document(content="something", id="123"),
no_answer=False,
origin="gold-label", origin="gold-label",
) )
document_store.write_labels([label]) document_store.write_labels([label])
@ -833,7 +832,6 @@ def test_labels(document_store: BaseDocumentStore):
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="something", id="324"), document=Document(content="something", id="324"),
no_answer=False,
origin="gold-label", origin="gold-label",
) )
document_store.write_labels([label, label2]) document_store.write_labels([label, label2])
@ -890,7 +888,6 @@ def test_multilabel(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
# different answer in same doc # different answer in same doc
@ -901,7 +898,6 @@ def test_multilabel(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
# answer in different doc # answer in different doc
@ -912,7 +908,6 @@ def test_multilabel(document_store: BaseDocumentStore):
document=Document(content="some other", id="333"), document=Document(content="some other", id="333"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
# 'no answer', should be excluded from MultiLabel # 'no answer', should be excluded from MultiLabel
@ -923,7 +918,6 @@ def test_multilabel(document_store: BaseDocumentStore):
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
# is_correct_answer=False, should be excluded from MultiLabel if "drop_negatives = True" # is_correct_answer=False, should be excluded from MultiLabel if "drop_negatives = True"
@ -934,7 +928,6 @@ def test_multilabel(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=False, is_correct_answer=False,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
] ]
@ -995,7 +988,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
# no answer in different doc # no answer in different doc
@ -1005,7 +997,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
# no answer in same doc, should be excluded # no answer in same doc, should be excluded
@ -1015,7 +1006,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
# no answer with is_correct_answer=False, should be excluded # no answer with is_correct_answer=False, should be excluded
@ -1025,7 +1015,6 @@ def test_multilabel_no_answer(document_store: BaseDocumentStore):
is_correct_answer=False, is_correct_answer=False,
is_correct_document=True, is_correct_document=True,
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
] ]
@ -1065,7 +1054,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
filters={"name": ["123"]}, filters={"name": ["123"]},
), ),
@ -1077,7 +1065,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
filters={"name": ["123"]}, filters={"name": ["123"]},
), ),
@ -1089,7 +1076,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
document=Document(content="some other", id="333"), document=Document(content="some other", id="333"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
filters={"name": ["333"]}, filters={"name": ["333"]},
), ),
@ -1101,7 +1087,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=True,
origin="gold-label", origin="gold-label",
filters={"name": ["777"]}, filters={"name": ["777"]},
), ),
@ -1113,7 +1098,6 @@ def test_multilabel_filter_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=False, is_correct_answer=False,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
filters={"name": ["123"]}, filters={"name": ["123"]},
), ),
@ -1157,7 +1141,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
meta={"file_id": ["123"]}, meta={"file_id": ["123"]},
), ),
@ -1169,7 +1152,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
meta={"file_id": ["123"]}, meta={"file_id": ["123"]},
), ),
@ -1181,7 +1163,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
document=Document(content="some other", id="333"), document=Document(content="some other", id="333"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
meta={"file_id": ["333"]}, meta={"file_id": ["333"]},
), ),
@ -1193,7 +1174,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=True,
origin="gold-label", origin="gold-label",
meta={"file_id": ["777"]}, meta={"file_id": ["777"]},
), ),
@ -1205,7 +1185,6 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
meta={"file_id": ["888"]}, meta={"file_id": ["888"]},
), ),
@ -1903,7 +1882,6 @@ def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_clo
origin="user-feedback", origin="user-feedback",
answer=Answer("biggest city in germany"), answer=Answer("biggest city in germany"),
id="3fa85f64-5717-4562-b3fc-2c963f66afa6", id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
no_answer=False,
pipeline_id=None, pipeline_id=None,
created_at=None, created_at=None,
updated_at=None, updated_at=None,

View File

@ -61,7 +61,6 @@ def test_no_answer_label():
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -78,7 +77,6 @@ def test_no_answer_label():
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
] ]
@ -249,7 +247,6 @@ def test_multilabel_preserve_order():
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -259,7 +256,6 @@ def test_multilabel_preserve_order():
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -269,7 +265,6 @@ def test_multilabel_preserve_order():
document=Document(content="some other", id="333"), document=Document(content="some other", id="333"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -279,7 +274,6 @@ def test_multilabel_preserve_order():
document=Document(content="some", id="777"), document=Document(content="some", id="777"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=True,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -289,7 +283,6 @@ def test_multilabel_preserve_order():
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=False, is_correct_answer=False,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
] ]
@ -309,7 +302,6 @@ def test_multilabel_preserve_order_w_duplicates():
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -319,7 +311,6 @@ def test_multilabel_preserve_order_w_duplicates():
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -329,7 +320,6 @@ def test_multilabel_preserve_order_w_duplicates():
document=Document(content="some other", id="333"), document=Document(content="some other", id="333"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -339,7 +329,6 @@ def test_multilabel_preserve_order_w_duplicates():
document=Document(content="some", id="123"), document=Document(content="some", id="123"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
Label( Label(
@ -349,7 +338,6 @@ def test_multilabel_preserve_order_w_duplicates():
document=Document(content="some other", id="333"), document=Document(content="some other", id="333"),
is_correct_answer=True, is_correct_answer=True,
is_correct_document=True, is_correct_document=True,
no_answer=False,
origin="gold-label", origin="gold-label",
), ),
] ]