mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-10 23:04:02 +00:00
Fix OOM in test_eval.py Windows CI (#1830)
* diable problematic eval tests for windows ci * move standard pipeline eval tests to separate test file * switch to elasticsearch documentstore to reduce inproc mem * Revert "switch to elasticsearch documentstore to reduce inproc mem" This reverts commit 7a75871909c3317a252dff3a4df17e99eff69d05. * get retiever from conftest * use smaller embedding model for summarizer * use smaller summarizer model * remove queries param from pipeline.eval() * isolate problematic tests * rename separate test file * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
180c05365a
commit
9293a902d7
@ -1,17 +1,14 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from haystack.document_stores.base import BaseDocumentStore
|
from haystack.document_stores.base import BaseDocumentStore
|
||||||
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
|
||||||
from haystack.nodes.preprocessor import PreProcessor
|
from haystack.nodes.preprocessor import PreProcessor
|
||||||
from haystack.nodes.evaluator import EvalAnswers, EvalDocuments
|
from haystack.nodes.evaluator import EvalAnswers, EvalDocuments
|
||||||
from haystack.nodes.query_classifier.transformers import TransformersQueryClassifier
|
from haystack.nodes.query_classifier.transformers import TransformersQueryClassifier
|
||||||
from haystack.nodes.retriever.dense import DensePassageRetriever, EmbeddingRetriever
|
from haystack.nodes.retriever.dense import DensePassageRetriever
|
||||||
from haystack.nodes.retriever.sparse import ElasticsearchRetriever
|
from haystack.nodes.retriever.sparse import ElasticsearchRetriever
|
||||||
from haystack.document_stores.memory import InMemoryDocumentStore
|
|
||||||
from haystack.pipelines.base import Pipeline
|
from haystack.pipelines.base import Pipeline
|
||||||
from haystack.pipelines import ExtractiveQAPipeline, DocumentSearchPipeline, FAQPipeline, GenerativeQAPipeline, SearchSummarizationPipeline
|
from haystack.pipelines import ExtractiveQAPipeline
|
||||||
from haystack.pipelines.standard_pipelines import RetrieverQuestionGenerationPipeline, TranslationWrapperPipeline
|
from haystack.pipelines.standard_pipelines import DocumentSearchPipeline, FAQPipeline, RetrieverQuestionGenerationPipeline, TranslationWrapperPipeline
|
||||||
from haystack.schema import Answer, Document, EvaluationResult, Label, MultiLabel, Span
|
from haystack.schema import Answer, Document, EvaluationResult, Label, MultiLabel, Span
|
||||||
from conftest import DOCS_WITH_EMBEDDINGS
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus"], indirect=True)
|
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus"], indirect=True)
|
||||||
@ -555,67 +552,6 @@ def test_document_search_calculate_metrics(retriever_with_docs):
|
|||||||
assert metrics["Retriever"]["precision"] == 1.0/6
|
assert metrics["Retriever"]["precision"] == 1.0/6
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
|
||||||
def test_generativeqa_calculate_metrics(document_store_with_docs: InMemoryDocumentStore, rag_generator):
|
|
||||||
retriever = EmbeddingRetriever(
|
|
||||||
document_store=document_store_with_docs,
|
|
||||||
embedding_model="deepset/sentence_bert",
|
|
||||||
use_gpu=False
|
|
||||||
)
|
|
||||||
document_store_with_docs.update_embeddings(retriever=retriever)
|
|
||||||
pipeline = GenerativeQAPipeline(generator=rag_generator, retriever=retriever)
|
|
||||||
eval_result: EvaluationResult = pipeline.eval(
|
|
||||||
labels=EVAL_LABELS,
|
|
||||||
params={"Retriever": {"top_k": 5}}
|
|
||||||
)
|
|
||||||
|
|
||||||
metrics = eval_result.calculate_metrics()
|
|
||||||
|
|
||||||
assert "Retriever" in eval_result
|
|
||||||
assert "Generator" in eval_result
|
|
||||||
assert len(eval_result) == 2
|
|
||||||
|
|
||||||
assert metrics["Retriever"]["mrr"] == 0.5
|
|
||||||
assert metrics["Retriever"]["map"] == 0.5
|
|
||||||
assert metrics["Retriever"]["recall_multi_hit"] == 0.5
|
|
||||||
assert metrics["Retriever"]["recall_single_hit"] == 0.5
|
|
||||||
assert metrics["Retriever"]["precision"] == 1.0/6
|
|
||||||
assert metrics["Generator"]["exact_match"] == 0.0
|
|
||||||
assert metrics["Generator"]["f1"] == 1.0/3
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
|
||||||
def test_summarizer_calculate_metrics(document_store_with_docs: ElasticsearchDocumentStore, summarizer):
|
|
||||||
retriever = EmbeddingRetriever(
|
|
||||||
document_store=document_store_with_docs,
|
|
||||||
embedding_model="deepset/sentence_bert",
|
|
||||||
use_gpu=False
|
|
||||||
)
|
|
||||||
document_store_with_docs.update_embeddings(retriever=retriever)
|
|
||||||
pipeline = SearchSummarizationPipeline(retriever=retriever, summarizer=summarizer, return_in_answer_format=True)
|
|
||||||
eval_result: EvaluationResult = pipeline.eval(
|
|
||||||
labels=EVAL_LABELS,
|
|
||||||
params={"Retriever": {"top_k": 5}}
|
|
||||||
)
|
|
||||||
|
|
||||||
metrics = eval_result.calculate_metrics()
|
|
||||||
|
|
||||||
assert "Retriever" in eval_result
|
|
||||||
assert "Summarizer" in eval_result
|
|
||||||
assert len(eval_result) == 2
|
|
||||||
|
|
||||||
assert metrics["Retriever"]["mrr"] == 0.5
|
|
||||||
assert metrics["Retriever"]["map"] == 0.5
|
|
||||||
assert metrics["Retriever"]["recall_multi_hit"] == 0.5
|
|
||||||
assert metrics["Retriever"]["recall_single_hit"] == 0.5
|
|
||||||
assert metrics["Retriever"]["precision"] == 1.0/6
|
|
||||||
assert metrics["Summarizer"]["mrr"] == 0.5
|
|
||||||
assert metrics["Summarizer"]["map"] == 0.5
|
|
||||||
assert metrics["Summarizer"]["recall_multi_hit"] == 0.5
|
|
||||||
assert metrics["Summarizer"]["recall_single_hit"] == 0.5
|
|
||||||
assert metrics["Summarizer"]["precision"] == 1.0/6
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||||
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||||
def test_faq_calculate_metrics(retriever_with_docs):
|
def test_faq_calculate_metrics(retriever_with_docs):
|
||||||
|
|||||||
72
test/test_eval_embedding_retriever.py
Normal file
72
test/test_eval_embedding_retriever.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import pytest
|
||||||
|
from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
|
||||||
|
from haystack.nodes.retriever.dense import EmbeddingRetriever
|
||||||
|
from haystack.document_stores.memory import InMemoryDocumentStore
|
||||||
|
from haystack.nodes.summarizer.transformers import TransformersSummarizer
|
||||||
|
from haystack.pipelines import GenerativeQAPipeline, SearchSummarizationPipeline
|
||||||
|
from haystack.schema import EvaluationResult
|
||||||
|
from test_eval import EVAL_LABELS
|
||||||
|
|
||||||
|
|
||||||
|
# had to be separated from other eval tests to work around OOM in Windows CI
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||||
|
@pytest.mark.parametrize("retriever_with_docs", ["embedding"], indirect=True)
|
||||||
|
def test_generativeqa_calculate_metrics(document_store_with_docs: InMemoryDocumentStore, rag_generator, retriever_with_docs):
|
||||||
|
document_store_with_docs.update_embeddings(retriever=retriever_with_docs)
|
||||||
|
pipeline = GenerativeQAPipeline(generator=rag_generator, retriever=retriever_with_docs)
|
||||||
|
eval_result: EvaluationResult = pipeline.eval(
|
||||||
|
labels=EVAL_LABELS,
|
||||||
|
params={"Retriever": {"top_k": 5}}
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics = eval_result.calculate_metrics()
|
||||||
|
|
||||||
|
assert "Retriever" in eval_result
|
||||||
|
assert "Generator" in eval_result
|
||||||
|
assert len(eval_result) == 2
|
||||||
|
|
||||||
|
assert metrics["Retriever"]["mrr"] == 0.5
|
||||||
|
assert metrics["Retriever"]["map"] == 0.5
|
||||||
|
assert metrics["Retriever"]["recall_multi_hit"] == 0.5
|
||||||
|
assert metrics["Retriever"]["recall_single_hit"] == 0.5
|
||||||
|
assert metrics["Retriever"]["precision"] == 1.0/6
|
||||||
|
assert metrics["Generator"]["exact_match"] == 0.0
|
||||||
|
assert metrics["Generator"]["f1"] == 1.0/3
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("document_store_with_docs", ["memory"], indirect=True)
|
||||||
|
def test_summarizer_calculate_metrics(document_store_with_docs: ElasticsearchDocumentStore):
|
||||||
|
summarizer = TransformersSummarizer(
|
||||||
|
model_name_or_path="sshleifer/distill-pegasus-xsum-16-4",
|
||||||
|
use_gpu=False
|
||||||
|
)
|
||||||
|
document_store_with_docs.embedding_dim = 384
|
||||||
|
retriever = EmbeddingRetriever(
|
||||||
|
document_store=document_store_with_docs,
|
||||||
|
embedding_model="sentence-transformers/all-MiniLM-L6-v2",
|
||||||
|
use_gpu=False
|
||||||
|
)
|
||||||
|
document_store_with_docs.update_embeddings(retriever=retriever)
|
||||||
|
pipeline = SearchSummarizationPipeline(retriever=retriever, summarizer=summarizer, return_in_answer_format=True)
|
||||||
|
eval_result: EvaluationResult = pipeline.eval(
|
||||||
|
labels=EVAL_LABELS,
|
||||||
|
params={"Retriever": {"top_k": 5}}
|
||||||
|
)
|
||||||
|
|
||||||
|
metrics = eval_result.calculate_metrics()
|
||||||
|
|
||||||
|
assert "Retriever" in eval_result
|
||||||
|
assert "Summarizer" in eval_result
|
||||||
|
assert len(eval_result) == 2
|
||||||
|
|
||||||
|
assert metrics["Retriever"]["mrr"] == 0.5
|
||||||
|
assert metrics["Retriever"]["map"] == 0.5
|
||||||
|
assert metrics["Retriever"]["recall_multi_hit"] == 0.5
|
||||||
|
assert metrics["Retriever"]["recall_single_hit"] == 0.5
|
||||||
|
assert metrics["Retriever"]["precision"] == 1.0/6
|
||||||
|
assert metrics["Summarizer"]["mrr"] == 0.0
|
||||||
|
assert metrics["Summarizer"]["map"] == 0.0
|
||||||
|
assert metrics["Summarizer"]["recall_multi_hit"] == 0.0
|
||||||
|
assert metrics["Summarizer"]["recall_single_hit"] == 0.0
|
||||||
|
assert metrics["Summarizer"]["precision"] == 0.0
|
||||||
Loading…
x
Reference in New Issue
Block a user