Aggregate multiple no answers in MultiLabel (#324)

* Aggregate multiple no answers

* Add test for multiple no answers
This commit is contained in:
bogdankostic 2020-08-18 18:25:01 +02:00 committed by GitHub
parent 3a95fe2006
commit f388ca025c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 63 additions and 3 deletions

View File

@ -255,9 +255,6 @@ class BaseDocumentStore(ABC):
f"Both text label and 'no answer possible' label is present for question: {ls[0].question}")
for remove_idx in no_idx[::-1]:
ls.pop(remove_idx)
# when all labels to a question say "no answer" we just need the first occurence
elif no_present and not t_present:
ls = ls[:1]
# construct Aggregated_label
for i, l in enumerate(ls):

View File

@ -171,6 +171,69 @@ def test_multilabel(document_store):
document_store.delete_all_documents(index="haystack_test_multilabel")
def test_multilabel_no_answer(document_store):
labels = [
Label(
question="question",
answer="",
is_correct_answer=True,
is_correct_document=True,
document_id="777",
offset_start_in_doc=0,
no_answer=True,
origin="gold_label",
),
# no answer in different doc
Label(
question="question",
answer="",
is_correct_answer=True,
is_correct_document=True,
document_id="123",
offset_start_in_doc=0,
no_answer=True,
origin="gold_label",
),
# no answer in same doc, should be excluded
Label(
question="question",
answer="",
is_correct_answer=True,
is_correct_document=True,
document_id="777",
offset_start_in_doc=0,
no_answer=True,
origin="gold_label",
),
# no answer with is_correct_answer=False, should be excluded
Label(
question="question",
answer="",
is_correct_answer=False,
is_correct_document=True,
document_id="321",
offset_start_in_doc=0,
no_answer=True,
origin="gold_label",
),
]
document_store.write_labels(labels, index="haystack_test_multilabel_no_answer")
multi_labels = document_store.get_all_labels_aggregated(index="haystack_test_multilabel_no_answer")
labels = document_store.get_all_labels(index="haystack_test_multilabel_no_answer")
assert len(multi_labels) == 1
assert len(labels) == 4
assert len(multi_labels[0].multiple_document_ids) == 2
assert len(multi_labels[0].multiple_answers) \
== len(multi_labels[0].multiple_document_ids) \
== len(multi_labels[0].multiple_offset_start_in_docs)
# clean up
document_store.delete_all_documents(index="haystack_test_multilabel_no_answer")
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
def test_elasticsearch_update_meta(document_store_with_docs):
document = document_store_with_docs.query(query=None, filters={"name": ["filename1"]})[0]