mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-27 19:00:35 +00:00

* set fixture scope to "function" * run FARMReader without multiprocessing * dispose off ray after tests * run most expensive tasks first in test files * run expensive tests first * run garbage collector between tests Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
137 lines
5.4 KiB
Python
137 lines
5.4 KiB
Python
import sys
|
|
from typing import List
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from haystack.schema import Document
|
|
from haystack.nodes.answer_generator import Seq2SeqGenerator
|
|
from haystack.pipelines import TranslationWrapperPipeline, GenerativeQAPipeline
|
|
|
|
|
|
from conftest import DOCS_WITH_EMBEDDINGS
|
|
|
|
|
|
# Keeping few (retriever,document_store) combination to reduce test time
|
|
@pytest.mark.slow
|
|
@pytest.mark.generator
|
|
@pytest.mark.parametrize(
|
|
"retriever,document_store",
|
|
[("embedding", "memory")],
|
|
indirect=True,
|
|
)
|
|
def test_generator_pipeline_with_translator(
|
|
document_store,
|
|
retriever,
|
|
rag_generator,
|
|
en_to_de_translator,
|
|
de_to_en_translator
|
|
):
|
|
document_store.write_documents(DOCS_WITH_EMBEDDINGS)
|
|
query = "Was ist die Hauptstadt der Bundesrepublik Deutschland?"
|
|
base_pipeline = GenerativeQAPipeline(retriever=retriever, generator=rag_generator)
|
|
pipeline = TranslationWrapperPipeline(
|
|
input_translator=de_to_en_translator,
|
|
output_translator=en_to_de_translator,
|
|
pipeline=base_pipeline
|
|
)
|
|
output = pipeline.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 1}})
|
|
answers = output["answers"]
|
|
assert len(answers) == 2
|
|
assert "berlin" in answers[0].answer
|
|
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.generator
|
|
def test_rag_token_generator(rag_generator):
|
|
query = "What is capital of the Germany?"
|
|
generated_docs = rag_generator.predict(query=query, documents=DOCS_WITH_EMBEDDINGS, top_k=1)
|
|
answers = generated_docs["answers"]
|
|
assert len(answers) == 1
|
|
assert "berlin" in answers[0].answer
|
|
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.generator
|
|
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
|
@pytest.mark.parametrize("retriever", ["embedding"], indirect=True)
|
|
def test_generator_pipeline(document_store, retriever, rag_generator):
|
|
document_store.write_documents(DOCS_WITH_EMBEDDINGS)
|
|
query = "What is capital of the Germany?"
|
|
pipeline = GenerativeQAPipeline(retriever=retriever, generator=rag_generator)
|
|
output = pipeline.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 1}})
|
|
answers = output["answers"]
|
|
assert len(answers) == 2
|
|
assert "berlin" in answers[0].answer
|
|
|
|
|
|
@pytest.mark.skipif(sys.platform in ['win32', 'cygwin'], reason="Gives memory allocation error on windows runner")
|
|
@pytest.mark.slow
|
|
@pytest.mark.generator
|
|
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
|
@pytest.mark.parametrize("retriever", ["retribert"], indirect=True)
|
|
@pytest.mark.vector_dim(128)
|
|
def test_lfqa_pipeline(document_store, retriever, eli5_generator):
|
|
# reuse existing DOCS but regenerate embeddings with retribert
|
|
docs: List[Document] = []
|
|
for idx, d in enumerate(DOCS_WITH_EMBEDDINGS):
|
|
docs.append(Document(d.content, str(idx)))
|
|
document_store.write_documents(docs)
|
|
document_store.update_embeddings(retriever)
|
|
query = "Tell me about Berlin?"
|
|
pipeline = GenerativeQAPipeline(retriever=retriever, generator=eli5_generator)
|
|
output = pipeline.run(query=query, params={"top_k": 1})
|
|
answers = output["answers"]
|
|
assert len(answers) == 1
|
|
assert "Germany" in answers[0]
|
|
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.generator
|
|
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
|
@pytest.mark.parametrize("retriever", ["retribert"], indirect=True)
|
|
@pytest.mark.vector_dim(128)
|
|
def test_lfqa_pipeline_unknown_converter(document_store, retriever):
|
|
# reuse existing DOCS but regenerate embeddings with retribert
|
|
docs: List[Document] = []
|
|
for idx, d in enumerate(DOCS_WITH_EMBEDDINGS):
|
|
docs.append(Document(d.content, str(idx)))
|
|
document_store.write_documents(docs)
|
|
document_store.update_embeddings(retriever)
|
|
seq2seq = Seq2SeqGenerator(model_name_or_path="patrickvonplaten/t5-tiny-random")
|
|
query = "Tell me about Berlin?"
|
|
pipeline = GenerativeQAPipeline(retriever=retriever, generator=seq2seq)
|
|
|
|
# raises exception as we don't have converter for "patrickvonplaten/t5-tiny-random" in Seq2SeqGenerator
|
|
with pytest.raises(Exception) as exception_info:
|
|
output = pipeline.run(query=query, params={"top_k": 1})
|
|
assert ("doesn\'t have input converter registered for patrickvonplaten/t5-tiny-random" in str(exception_info.value))
|
|
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.generator
|
|
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
|
@pytest.mark.parametrize("retriever", ["retribert"], indirect=True)
|
|
@pytest.mark.vector_dim(128)
|
|
def test_lfqa_pipeline_invalid_converter(document_store, retriever):
|
|
# reuse existing DOCS but regenerate embeddings with retribert
|
|
docs: List[Document] = []
|
|
for idx, d in enumerate(DOCS_WITH_EMBEDDINGS):
|
|
docs.append(Document(d.content, str(idx)))
|
|
document_store.write_documents(docs)
|
|
document_store.update_embeddings(retriever)
|
|
|
|
class _InvalidConverter:
|
|
|
|
def __call__(self, some_invalid_para: str, another_invalid_param: str) -> None:
|
|
pass
|
|
|
|
seq2seq = Seq2SeqGenerator(model_name_or_path="patrickvonplaten/t5-tiny-random", input_converter=_InvalidConverter())
|
|
query = "This query will fail due to InvalidConverter used"
|
|
pipeline = GenerativeQAPipeline(retriever=retriever, generator=seq2seq)
|
|
|
|
# raises exception as we are using invalid method signature in _InvalidConverter
|
|
with pytest.raises(Exception) as exception_info:
|
|
output = pipeline.run(query=query, params={"top_k": 1})
|
|
assert ("does not have a valid __call__ method signature" in str(exception_info.value))
|