mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-30 00:30:09 +00:00
fix: Deduplicate same Documents in isolated evaluation of Reader (#4114)
* Deduplicate same Documents in one MultiLabel * Add tests * Update label * Update label * Update test * Update test * Revert change to check CI * Revert reversion * Use deepcopy * Update tests
This commit is contained in:
parent
3c793e4edc
commit
05950719ba
@ -118,9 +118,13 @@ class BaseReader(BaseComponent):
|
||||
|
||||
# run evaluation with labels as node inputs
|
||||
if add_isolated_node_eval and labels is not None:
|
||||
relevant_documents = [label.document for label in labels.labels]
|
||||
# Filter out empty documents
|
||||
relevant_documents = [d for d in relevant_documents if d.content.strip() != ""]
|
||||
# This dict comprehension deduplicates same Documents in a MultiLabel based on their Document ID and
|
||||
# filters out empty documents
|
||||
relevant_documents = list(
|
||||
{
|
||||
label.document.id: label.document for label in labels.labels if label.document.content.strip() != ""
|
||||
}.values()
|
||||
)
|
||||
results_label_input = predict(query=query, documents=relevant_documents, top_k=top_k)
|
||||
|
||||
# Add corresponding document_name and more meta data, if an answer contains the document_id
|
||||
@ -174,10 +178,15 @@ class BaseReader(BaseComponent):
|
||||
if add_isolated_node_eval and labels is not None:
|
||||
relevant_documents = []
|
||||
for labelx in labels:
|
||||
# Filter out empty documents
|
||||
relevant_docs_labelx = [
|
||||
label.document for label in labelx.labels if label.document.content.strip() != ""
|
||||
]
|
||||
# This dict comprehension deduplicates same Documents in a MultiLabel based on their Document ID
|
||||
# and filters out empty documents
|
||||
relevant_docs_labelx = list(
|
||||
{
|
||||
label.document.id: label.document
|
||||
for label in labelx.labels
|
||||
if label.document.content.strip() != ""
|
||||
}.values()
|
||||
)
|
||||
relevant_documents.append(relevant_docs_labelx)
|
||||
results_label_input = predict_batch(queries=queries, documents=relevant_documents, top_k=top_k)
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import pytest
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from haystack.document_stores.memory import InMemoryDocumentStore
|
||||
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.nodes.preprocessor import PreProcessor
|
||||
@ -1267,9 +1268,14 @@ def test_extractive_qa_eval_simulated_top_k_reader_and_retriever(reader, retriev
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
||||
def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
|
||||
labels = deepcopy(EVAL_LABELS)
|
||||
# Copy one of the labels and change only the answer have a label with a different answer but same Document
|
||||
label_copy = deepcopy(labels[0].labels[0])
|
||||
label_copy.answer = Answer(answer="I", offsets_in_context=[Span(21, 22)])
|
||||
labels[0].labels.append(label_copy)
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
eval_result: EvaluationResult = pipeline.eval(
|
||||
labels=EVAL_LABELS,
|
||||
labels=labels,
|
||||
sas_model_name_or_path="sentence-transformers/paraphrase-MiniLM-L3-v2",
|
||||
add_isolated_node_eval=True,
|
||||
)
|
||||
@ -1292,6 +1298,12 @@ def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
|
||||
assert metrics_top_1["Reader"]["f1"] == 1.0
|
||||
assert metrics_top_1["Reader"]["sas"] == pytest.approx(1.0, abs=1e-4)
|
||||
|
||||
# Check if same Document in MultiLabel got deduplicated
|
||||
assert labels[0].labels[0].id == labels[0].labels[1].id
|
||||
reader_eval_df = eval_result.node_results["Reader"]
|
||||
isolated_reader_eval_df = reader_eval_df[reader_eval_df["eval_mode"] == "isolated"]
|
||||
assert len(isolated_reader_eval_df) == len(labels) * reader.top_k_per_candidate
|
||||
|
||||
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import pytest
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from haystack.document_stores.memory import InMemoryDocumentStore
|
||||
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.nodes.preprocessor import PreProcessor
|
||||
@ -607,9 +608,14 @@ def test_extractive_qa_eval_simulated_top_k_reader_and_retriever(reader, retriev
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
||||
def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
|
||||
labels = deepcopy(EVAL_LABELS)
|
||||
# Copy one of the labels and change only the answer have a label with a different answer but same Document
|
||||
label_copy = deepcopy(labels[0].labels[0])
|
||||
label_copy.answer = Answer(answer="I", offsets_in_context=[Span(21, 22)])
|
||||
labels[0].labels.append(label_copy)
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
eval_result: EvaluationResult = pipeline.eval_batch(
|
||||
labels=EVAL_LABELS,
|
||||
labels=labels,
|
||||
sas_model_name_or_path="sentence-transformers/paraphrase-MiniLM-L3-v2",
|
||||
add_isolated_node_eval=True,
|
||||
)
|
||||
@ -632,6 +638,12 @@ def test_extractive_qa_eval_isolated(reader, retriever_with_docs):
|
||||
assert metrics_top_1["Reader"]["f1"] == 1.0
|
||||
assert metrics_top_1["Reader"]["sas"] == pytest.approx(1.0, abs=1e-4)
|
||||
|
||||
# Check if same Document in MultiLabel got deduplicated
|
||||
assert labels[0].labels[0].id == labels[0].labels[1].id
|
||||
reader_eval_df = eval_result.node_results["Reader"]
|
||||
isolated_reader_eval_df = reader_eval_df[reader_eval_df["eval_mode"] == "isolated"]
|
||||
assert len(isolated_reader_eval_df) == len(labels) * reader.top_k_per_candidate
|
||||
|
||||
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user