mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-25 05:58:57 +00:00
Limit generator tests to memory doc store; split pipeline tests (#1602)
* Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
parent
5cfdabda2c
commit
5ec29a5283
@ -57,7 +57,6 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial
|
||||
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
|
||||
|
||||
from haystack.retriever.dense import EmbeddingRetriever
|
||||
from haystack.utils import print_answers
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
||||
|
||||
@ -416,6 +416,7 @@ def test_rag_token_generator(rag_generator):
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.generator
|
||||
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
||||
@pytest.mark.parametrize("retriever", ["embedding"], indirect=True)
|
||||
def test_generator_pipeline(document_store, retriever, rag_generator):
|
||||
document_store.write_documents(DOCS_WITH_EMBEDDINGS)
|
||||
@ -429,6 +430,7 @@ def test_generator_pipeline(document_store, retriever, rag_generator):
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.generator
|
||||
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
||||
@pytest.mark.parametrize("retriever", ["retribert"], indirect=True)
|
||||
@pytest.mark.vector_dim(128)
|
||||
def test_lfqa_pipeline(document_store, retriever, eli5_generator):
|
||||
|
||||
@ -6,9 +6,7 @@ import pytest
|
||||
|
||||
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.pipeline import (
|
||||
TranslationWrapperPipeline,
|
||||
JoinDocuments,
|
||||
ExtractiveQAPipeline,
|
||||
Pipeline,
|
||||
FAQPipeline,
|
||||
DocumentSearchPipeline,
|
||||
@ -20,7 +18,7 @@ from haystack.pipeline import (
|
||||
from haystack.reader import FARMReader
|
||||
from haystack.retriever.dense import DensePassageRetriever
|
||||
from haystack.retriever.sparse import ElasticsearchRetriever
|
||||
from haystack.schema import Document, Answer
|
||||
from haystack.schema import Document
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
@ -259,74 +257,6 @@ def test_invalid_run_args():
|
||||
assert "Invalid parameter 'invalid' for the node 'ESRetriever'" in str(exc.value)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers(reader, retriever_with_docs, document_store_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}},
|
||||
)
|
||||
assert prediction is not None
|
||||
assert type(prediction["answers"][0]) == Answer
|
||||
assert prediction["query"] == "Who lives in Berlin?"
|
||||
assert prediction["answers"][0].answer == "Carla"
|
||||
assert prediction["answers"][0].score <= 1
|
||||
assert prediction["answers"][0].score >= 0
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert (
|
||||
prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
)
|
||||
|
||||
assert len(prediction["answers"]) == 3
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers_without_normalized_scores(reader_without_normalized_scores, retriever_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader_without_normalized_scores, retriever=retriever_with_docs)
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?", params={"Reader": {"top_k": 3}}
|
||||
)
|
||||
assert prediction is not None
|
||||
assert prediction["query"] == "Who lives in Berlin?"
|
||||
assert prediction["answers"][0].answer == "Carla"
|
||||
assert prediction["answers"][0].score <= 11
|
||||
assert prediction["answers"][0].score >= 10
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert (
|
||||
prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
)
|
||||
|
||||
assert len(prediction["answers"]) == 3
|
||||
|
||||
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_offsets(reader, retriever_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 5}})
|
||||
|
||||
start = prediction["answers"][0].offsets_in_context[0].start
|
||||
end = prediction["answers"][0].offsets_in_context[0].end
|
||||
|
||||
assert start == 11
|
||||
assert end == 16
|
||||
|
||||
assert (
|
||||
prediction["answers"][0].context[start:end]
|
||||
== prediction["answers"][0].answer
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers_single_result(reader, retriever_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
query = "testing finder"
|
||||
prediction = pipeline.run(query=query, params={"top_k": 1})
|
||||
assert prediction is not None
|
||||
assert len(prediction["answers"]) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"retriever,document_store",
|
||||
[
|
||||
@ -398,30 +328,6 @@ def test_document_search_pipeline(retriever, document_store):
|
||||
assert len(output["documents"]) == 1
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers_with_translator(
|
||||
reader, retriever_with_docs, en_to_de_translator, de_to_en_translator
|
||||
):
|
||||
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator,
|
||||
output_translator=en_to_de_translator,
|
||||
pipeline=base_pipeline,
|
||||
)
|
||||
|
||||
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
|
||||
assert prediction is not None
|
||||
assert prediction["query"] == "Wer lebt in Berlin?"
|
||||
assert "Carla" in prediction["answers"][0].answer
|
||||
assert prediction["answers"][0].score <= 1
|
||||
assert prediction["answers"][0].score >= 0
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert (
|
||||
prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.elasticsearch
|
||||
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
|
||||
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
||||
|
||||
100
test/test_pipeline_extractive_qa.py
Normal file
100
test/test_pipeline_extractive_qa.py
Normal file
@ -0,0 +1,100 @@
|
||||
import pytest
|
||||
|
||||
from haystack.pipeline import (
|
||||
TranslationWrapperPipeline,
|
||||
ExtractiveQAPipeline
|
||||
)
|
||||
|
||||
from haystack.schema import Answer
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers(reader, retriever_with_docs, document_store_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}},
|
||||
)
|
||||
assert prediction is not None
|
||||
assert type(prediction["answers"][0]) == Answer
|
||||
assert prediction["query"] == "Who lives in Berlin?"
|
||||
assert prediction["answers"][0].answer == "Carla"
|
||||
assert prediction["answers"][0].score <= 1
|
||||
assert prediction["answers"][0].score >= 0
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert (
|
||||
prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
)
|
||||
|
||||
assert len(prediction["answers"]) == 3
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers_without_normalized_scores(reader_without_normalized_scores, retriever_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader_without_normalized_scores, retriever=retriever_with_docs)
|
||||
prediction = pipeline.run(
|
||||
query="Who lives in Berlin?", params={"Reader": {"top_k": 3}}
|
||||
)
|
||||
assert prediction is not None
|
||||
assert prediction["query"] == "Who lives in Berlin?"
|
||||
assert prediction["answers"][0].answer == "Carla"
|
||||
assert prediction["answers"][0].score <= 11
|
||||
assert prediction["answers"][0].score >= 10
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert (
|
||||
prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
)
|
||||
|
||||
assert len(prediction["answers"]) == 3
|
||||
|
||||
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_offsets(reader, retriever_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 5}})
|
||||
|
||||
start = prediction["answers"][0].offsets_in_context[0].start
|
||||
end = prediction["answers"][0].offsets_in_context[0].end
|
||||
|
||||
assert start == 11
|
||||
assert end == 16
|
||||
|
||||
assert (
|
||||
prediction["answers"][0].context[start:end]
|
||||
== prediction["answers"][0].answer
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers_single_result(reader, retriever_with_docs):
|
||||
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
query = "testing finder"
|
||||
prediction = pipeline.run(query=query, params={"top_k": 1})
|
||||
assert prediction is not None
|
||||
assert len(prediction["answers"]) == 1
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
def test_extractive_qa_answers_with_translator(
|
||||
reader, retriever_with_docs, en_to_de_translator, de_to_en_translator
|
||||
):
|
||||
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator,
|
||||
output_translator=en_to_de_translator,
|
||||
pipeline=base_pipeline,
|
||||
)
|
||||
|
||||
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
|
||||
assert prediction is not None
|
||||
assert prediction["query"] == "Wer lebt in Berlin?"
|
||||
assert "Carla" in prediction["answers"][0].answer
|
||||
assert prediction["answers"][0].score <= 1
|
||||
assert prediction["answers"][0].score >= 0
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert (
|
||||
prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
)
|
||||
Loading…
x
Reference in New Issue
Block a user