fix: Deduplicate same Documents in isolated evaluation of Reader (#4114)

* Deduplicate same Documents in one MultiLabel

* Add tests

* Update label

* Update label

* Update test

* Update test

* Revert change to check CI

* Revert reversion

* Use deepcopy

* Update tests
This commit is contained in:
bogdankostic 2023-02-10 13:55:14 +01:00 committed by GitHub
parent 3c793e4edc
commit 05950719ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 9 deletions

View File

@ -118,9 +118,13 @@ class BaseReader(BaseComponent):
# run evaluation with labels as node inputs
if add_isolated_node_eval and labels is not None:
relevant_documents = [label.document for label in labels.labels]
# Filter out empty documents
relevant_documents = [d for d in relevant_documents if d.content.strip() != ""]
# This dict comprehension deduplicates same Documents in a MultiLabel based on their Document ID and
# filters out empty documents
relevant_documents = list(
{
label.document.id: label.document for label in labels.labels if label.document.content.strip() != ""
}.values()
)
results_label_input = predict(query=query, documents=relevant_documents, top_k=top_k)
# Add corresponding document_name and more meta data, if an answer contains the document_id
@ -174,10 +178,15 @@ class BaseReader(BaseComponent):
if add_isolated_node_eval and labels is not None:
relevant_documents = []
for labelx in labels:
# Filter out empty documents
relevant_docs_labelx = [
label.document for label in labelx.labels if label.document.content.strip() != ""
]
# This dict comprehension deduplicates same Documents in a MultiLabel based on their Document ID
# and filters out empty documents
relevant_docs_labelx = list(
{
label.document.id: label.document
for label in labelx.labels
if label.document.content.strip() != ""
}.values()
)
relevant_documents.append(relevant_docs_labelx)
results_label_input = predict_batch(queries=queries, documents=relevant_documents, top_k=top_k)

View File

@ -1,6 +1,7 @@
import logging
import pytest
import sys
from copy import deepcopy
from haystack.document_stores.memory import InMemoryDocumentStore
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
from haystack.nodes.preprocessor import PreProcessor
@ -1267,9 +1268,14 @@ def test_extractive_qa_eval_simulated_top_k_reader_and_retriever(reader, retriev
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
labels = deepcopy(EVAL_LABELS)
# Copy one of the labels and change only the answer have a label with a different answer but same Document
label_copy = deepcopy(labels[0].labels[0])
label_copy.answer = Answer(answer="I", offsets_in_context=[Span(21, 22)])
labels[0].labels.append(label_copy)
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
eval_result: EvaluationResult = pipeline.eval(
labels=EVAL_LABELS,
labels=labels,
sas_model_name_or_path="sentence-transformers/paraphrase-MiniLM-L3-v2",
add_isolated_node_eval=True,
)
@ -1292,6 +1298,12 @@ def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
assert metrics_top_1["Reader"]["f1"] == 1.0
assert metrics_top_1["Reader"]["sas"] == pytest.approx(1.0, abs=1e-4)
# Check if same Document in MultiLabel got deduplicated
assert labels[0].labels[0].id == labels[0].labels[1].id
reader_eval_df = eval_result.node_results["Reader"]
isolated_reader_eval_df = reader_eval_df[reader_eval_df["eval_mode"] == "isolated"]
assert len(isolated_reader_eval_df) == len(labels) * reader.top_k_per_candidate
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)

View File

@ -1,6 +1,7 @@
import logging
import pytest
import sys
from copy import deepcopy
from haystack.document_stores.memory import InMemoryDocumentStore
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
from haystack.nodes.preprocessor import PreProcessor
@ -607,9 +608,14 @@ def test_extractive_qa_eval_simulated_top_k_reader_and_retriever(reader, retriev
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
labels = deepcopy(EVAL_LABELS)
# Copy one of the labels and change only the answer have a label with a different answer but same Document
label_copy = deepcopy(labels[0].labels[0])
label_copy.answer = Answer(answer="I", offsets_in_context=[Span(21, 22)])
labels[0].labels.append(label_copy)
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
eval_result: EvaluationResult = pipeline.eval_batch(
labels=EVAL_LABELS,
labels=labels,
sas_model_name_or_path="sentence-transformers/paraphrase-MiniLM-L3-v2",
add_isolated_node_eval=True,
)
@ -632,6 +638,12 @@ def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
assert metrics_top_1["Reader"]["f1"] == 1.0
assert metrics_top_1["Reader"]["sas"] == pytest.approx(1.0, abs=1e-4)
# Check if same Document in MultiLabel got deduplicated
assert labels[0].labels[0].id == labels[0].labels[1].id
reader_eval_df = eval_result.node_results["Reader"]
isolated_reader_eval_df = reader_eval_df[reader_eval_df["eval_mode"] == "isolated"]
assert len(isolated_reader_eval_df) == len(labels) * reader.top_k_per_candidate
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)