mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-12-26 14:38:36 +00:00
test: mock all Translator tests and move one to e2e (#4290)
* mock all translator tests and move one to e2e * typo * extract pipeline tests using translator * remove duplicate test * move generator test in e2e * Update e2e/pipelines/test_extractive_qa.py * pytest.mark.unit * black * remove model name as well * remove unused fixture * rename original and improve pipeline tests * fixes * pylint
This commit is contained in:
parent
7e0f9715ba
commit
165a0a5faa
0
e2e/__init__.py
Normal file
0
e2e/__init__.py
Normal file
@ -1,6 +1,7 @@
|
||||
import os
|
||||
import uuid
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
@ -16,6 +17,9 @@ from haystack.document_stores import (
|
||||
)
|
||||
|
||||
|
||||
SAMPLES_PATH = Path(__file__).parent.parent / "test" / "samples"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def docs_all_formats():
|
||||
return [
|
||||
@ -6,7 +6,7 @@ import numpy as np
|
||||
|
||||
from haystack.schema import Document
|
||||
|
||||
from .conftest import document_store
|
||||
from ..conftest import document_store
|
||||
|
||||
|
||||
DOCUMENTS = [
|
||||
|
||||
@ -3,7 +3,7 @@ import pytest
|
||||
from haystack.nodes import EmbeddingRetriever
|
||||
from haystack.pipelines import DocumentSearchPipeline
|
||||
|
||||
from .conftest import document_store
|
||||
from ..conftest import document_store
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["memory", "faiss", "milvus", "weaviate", "elasticsearch"])
|
||||
|
||||
@ -4,7 +4,7 @@ import pandas as pd
|
||||
|
||||
from haystack.nodes import EmbeddingRetriever, TableTextRetriever
|
||||
|
||||
from .conftest import document_store
|
||||
from ..conftest import document_store
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", ["elasticsearch", "faiss", "memory", "milvus"])
|
||||
|
||||
0
e2e/nodes/__init__.py
Normal file
0
e2e/nodes/__init__.py
Normal file
13
e2e/nodes/test_translator.py
Normal file
13
e2e/nodes/test_translator.py
Normal file
@ -0,0 +1,13 @@
|
||||
from haystack import Document
|
||||
from haystack.nodes import TransformersTranslator
|
||||
|
||||
|
||||
def test_translator():
|
||||
en_to_de_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
|
||||
|
||||
original = "I live in Berlin"
|
||||
translation = "Ich lebe in Berlin"
|
||||
|
||||
assert en_to_de_translator.translate(query=original) == translation
|
||||
assert en_to_de_translator.translate(documents=[original])[0] == translation
|
||||
assert en_to_de_translator.translate(documents=[Document(content=original)])[0].content == translation
|
||||
0
e2e/pipelines/__init__.py
Normal file
0
e2e/pipelines/__init__.py
Normal file
29
e2e/pipelines/test_extractive_qa.py
Normal file
29
e2e/pipelines/test_extractive_qa.py
Normal file
@ -0,0 +1,29 @@
|
||||
from haystack.nodes import TransformersTranslator, FARMReader, TfidfRetriever
|
||||
from haystack.pipelines import ExtractiveQAPipeline, TranslationWrapperPipeline
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
|
||||
|
||||
def test_extractive_qa_answers_with_translator(docs):
|
||||
en_to_de_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
|
||||
de_to_en_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-de-en")
|
||||
|
||||
ds = InMemoryDocumentStore(use_bm25=False)
|
||||
retriever = TfidfRetriever(document_store=ds)
|
||||
reader = FARMReader(
|
||||
model_name_or_path="deepset/bert-medium-squad2-distilled", use_gpu=False, top_k_per_sample=5, num_processes=0
|
||||
)
|
||||
ds.write_documents(docs)
|
||||
|
||||
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
|
||||
)
|
||||
|
||||
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
|
||||
assert prediction is not None
|
||||
assert prediction["query"] == "Wer lebt in Berlin?"
|
||||
assert "Carla" in prediction["answers"][0].answer
|
||||
assert prediction["answers"][0].score <= 1
|
||||
assert prediction["answers"][0].score >= 0
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
36
e2e/pipelines/test_generative_qa.py
Normal file
36
e2e/pipelines/test_generative_qa.py
Normal file
@ -0,0 +1,36 @@
|
||||
from haystack import Document
|
||||
from haystack.pipelines import TranslationWrapperPipeline, GenerativeQAPipeline
|
||||
from haystack.document_stores import InMemoryDocumentStore
|
||||
from haystack.nodes import DensePassageRetriever, RAGenerator, TransformersTranslator
|
||||
|
||||
|
||||
def test_generative_pipeline_with_translator():
|
||||
docs = [
|
||||
Document(content="The capital of Germany is the city state of Berlin."),
|
||||
Document(content="Berlin is the capital and largest city of Germany by both area and population."),
|
||||
]
|
||||
ds = InMemoryDocumentStore(use_bm25=True)
|
||||
ds.write_documents(docs)
|
||||
retriever = DensePassageRetriever( # Needs DPR or RAGenerator will thrown an exception...
|
||||
document_store=ds,
|
||||
query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
|
||||
passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
|
||||
use_gpu=False,
|
||||
embed_title=True,
|
||||
)
|
||||
ds.update_embeddings(retriever=retriever)
|
||||
rag_generator = RAGenerator(
|
||||
model_name_or_path="facebook/rag-token-nq", generator_type="token", max_length=20, retriever=retriever
|
||||
)
|
||||
en_to_de_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
|
||||
de_to_en_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-de-en")
|
||||
|
||||
query = "Was ist die Hauptstadt der Bundesrepublik Deutschland?"
|
||||
base_pipeline = GenerativeQAPipeline(retriever=retriever, generator=rag_generator)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
|
||||
)
|
||||
output = pipeline.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 1}})
|
||||
answers = output["answers"]
|
||||
assert len(answers) == 2
|
||||
assert "berlin" in answers[0].answer.lower()
|
||||
@ -54,7 +54,6 @@ from haystack.nodes import (
|
||||
TableReader,
|
||||
RCIReader,
|
||||
TransformersSummarizer,
|
||||
TransformersTranslator,
|
||||
QuestionGenerator,
|
||||
PromptTemplate,
|
||||
)
|
||||
@ -555,16 +554,6 @@ def summarizer():
|
||||
return TransformersSummarizer(model_name_or_path="sshleifer/distilbart-xsum-12-6", use_gpu=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_to_de_translator():
|
||||
return TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def de_to_en_translator():
|
||||
return TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-de-en")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def reader_without_normalized_scores():
|
||||
return FARMReader(
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
import pytest
|
||||
|
||||
from haystack.pipelines import TranslationWrapperPipeline, ExtractiveQAPipeline
|
||||
from .test_summarizer import SPLIT_DOCS
|
||||
|
||||
|
||||
# Keeping few (retriever,document_store,reader) combination to reduce test time
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.summarizer
|
||||
@pytest.mark.parametrize("retriever,document_store,reader", [("embedding", "memory", "farm")], indirect=True)
|
||||
def test_extractive_qa_pipeline_with_translator(
|
||||
document_store, retriever, reader, en_to_de_translator, de_to_en_translator
|
||||
):
|
||||
document_store.write_documents(SPLIT_DOCS)
|
||||
document_store.update_embeddings(retriever=retriever)
|
||||
|
||||
query = "Wo steht der Eiffelturm?"
|
||||
base_pipeline = ExtractiveQAPipeline(retriever=retriever, reader=reader)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
|
||||
)
|
||||
output = pipeline.run(query=query, params={"Retriever": {"top_k": 2}})
|
||||
assert len(output["documents"]) == 2
|
||||
answers_texts = [el.answer for el in output["answers"]]
|
||||
|
||||
assert "Frankreich" in answers_texts
|
||||
@ -6,32 +6,12 @@ import pytest
|
||||
|
||||
from haystack.schema import Document
|
||||
from haystack.nodes.answer_generator import Seq2SeqGenerator, OpenAIAnswerGenerator
|
||||
from haystack.pipelines import TranslationWrapperPipeline, GenerativeQAPipeline
|
||||
from haystack.pipelines import GenerativeQAPipeline
|
||||
from haystack.nodes import PromptTemplate
|
||||
|
||||
import logging
|
||||
|
||||
|
||||
# Keeping few (retriever,document_store) combination to reduce test time
|
||||
@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Causes OOM on windows github runner")
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.generator
|
||||
@pytest.mark.parametrize("retriever,document_store", [("embedding", "memory")], indirect=True)
|
||||
def test_generator_pipeline_with_translator(
|
||||
document_store, retriever, rag_generator, en_to_de_translator, de_to_en_translator, docs_with_true_emb
|
||||
):
|
||||
document_store.write_documents(docs_with_true_emb)
|
||||
query = "Was ist die Hauptstadt der Bundesrepublik Deutschland?"
|
||||
base_pipeline = GenerativeQAPipeline(retriever=retriever, generator=rag_generator)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
|
||||
)
|
||||
output = pipeline.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 1}})
|
||||
answers = output["answers"]
|
||||
assert len(answers) == 2
|
||||
assert "berlin" in answers[0].answer
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.generator
|
||||
def test_rag_token_generator(rag_generator, docs_with_true_emb):
|
||||
|
||||
@ -1,60 +1,115 @@
|
||||
from haystack.schema import Document
|
||||
|
||||
import pytest
|
||||
|
||||
EXPECTED_OUTPUT = "Ich lebe in Berlin"
|
||||
INPUT = "I live in Berlin"
|
||||
|
||||
DOCUMENT_INPUT = Document(content=INPUT)
|
||||
import haystack
|
||||
from haystack.schema import Document
|
||||
from haystack.nodes import TransformersTranslator
|
||||
|
||||
|
||||
ORIGINAL_TEXT = "TEST QUERY"
|
||||
TRANSLATION = "MOCK TRANSLATION"
|
||||
|
||||
|
||||
class MockTokenizer:
|
||||
@classmethod
|
||||
def from_pretrained(cls, *a, **k):
|
||||
return cls()
|
||||
|
||||
def __call__(self, *a, **k):
|
||||
return self
|
||||
|
||||
def to(self, *a, **k):
|
||||
return {}
|
||||
|
||||
def batch_decode(self, *a, **k):
|
||||
return [TRANSLATION]
|
||||
|
||||
|
||||
class MockModel:
|
||||
@classmethod
|
||||
def from_pretrained(cls, *a, **k):
|
||||
return cls()
|
||||
|
||||
def generate(self, *a, **k):
|
||||
return None
|
||||
|
||||
def to(self, *a, **k):
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_models(monkeypatch):
|
||||
monkeypatch.setattr(haystack.nodes.translator.transformers, "AutoModelForSeq2SeqLM", MockModel)
|
||||
monkeypatch.setattr(haystack.nodes.translator.transformers, "AutoTokenizer", MockTokenizer)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_to_de_translator(mock_models) -> TransformersTranslator:
|
||||
return TransformersTranslator(model_name_or_path="irrelevant/anyway")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def de_to_en_translator(mock_models) -> TransformersTranslator:
|
||||
return TransformersTranslator(model_name_or_path="irrelevant/anyway")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_query(en_to_de_translator):
|
||||
assert en_to_de_translator.translate(query=INPUT) == EXPECTED_OUTPUT
|
||||
assert en_to_de_translator.translate(query=ORIGINAL_TEXT) == TRANSLATION
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_list(en_to_de_translator):
|
||||
assert en_to_de_translator.translate(documents=[INPUT])[0] == EXPECTED_OUTPUT
|
||||
assert en_to_de_translator.translate(documents=[ORIGINAL_TEXT])[0] == TRANSLATION
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_document(en_to_de_translator):
|
||||
assert en_to_de_translator.translate(documents=[Document(content=INPUT)])[0].content == EXPECTED_OUTPUT
|
||||
assert en_to_de_translator.translate(documents=[Document(content=ORIGINAL_TEXT)])[0].content == TRANSLATION
|
||||
|
||||
|
||||
def test_translator_with_document_preserves_input(en_to_de_translator):
|
||||
original_document = Document(content=INPUT)
|
||||
en_to_de_translator.translate(documents=[original_document])[0] # pylint: disable=expression-not-assigned
|
||||
assert original_document.content == INPUT
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_document_preserves_original(en_to_de_translator):
|
||||
original_document = Document(content=ORIGINAL_TEXT)
|
||||
en_to_de_translator.translate(documents=[original_document])
|
||||
assert original_document.content == ORIGINAL_TEXT
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_dictionary(en_to_de_translator):
|
||||
assert en_to_de_translator.translate(documents=[{"content": INPUT}])[0]["content"] == EXPECTED_OUTPUT
|
||||
assert en_to_de_translator.translate(documents=[{"content": ORIGINAL_TEXT}])[0]["content"] == TRANSLATION
|
||||
|
||||
|
||||
def test_translator_with_dictionary_preserves_input(en_to_de_translator):
|
||||
original_document = {"content": INPUT}
|
||||
en_to_de_translator.translate(documents=[original_document])[0] # pylint: disable=expression-not-assigned
|
||||
assert original_document["content"] == INPUT
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_dictionary_preserves_original(en_to_de_translator):
|
||||
original_document = {"content": ORIGINAL_TEXT}
|
||||
en_to_de_translator.translate(documents=[original_document])
|
||||
assert original_document["content"] == ORIGINAL_TEXT
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_dictionary_with_dict_key(en_to_de_translator):
|
||||
assert en_to_de_translator.translate(documents=[{"key": INPUT}], dict_key="key")[0]["key"] == EXPECTED_OUTPUT
|
||||
assert en_to_de_translator.translate(documents=[{"key": ORIGINAL_TEXT}], dict_key="key")[0]["key"] == TRANSLATION
|
||||
|
||||
|
||||
def test_translator_with_empty_input(en_to_de_translator):
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_empty_original(en_to_de_translator):
|
||||
with pytest.raises(AttributeError):
|
||||
en_to_de_translator.translate()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_query_and_documents(en_to_de_translator):
|
||||
with pytest.raises(AttributeError):
|
||||
en_to_de_translator.translate(query=INPUT, documents=[INPUT])
|
||||
en_to_de_translator.translate(query=ORIGINAL_TEXT, documents=[ORIGINAL_TEXT])
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_dict_without_text_key(en_to_de_translator):
|
||||
with pytest.raises(AttributeError):
|
||||
en_to_de_translator.translate(documents=[{"text1": INPUT}])
|
||||
en_to_de_translator.translate(documents=[{"text1": ORIGINAL_TEXT}])
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_translator_with_dict_with_non_string_value(en_to_de_translator):
|
||||
with pytest.raises(AttributeError):
|
||||
en_to_de_translator.translate(documents=[{"text": 123}])
|
||||
|
||||
@ -60,22 +60,3 @@ def test_extractive_qa_answers_single_result(reader, retriever_with_docs):
|
||||
prediction = pipeline.run(query=query, params={"Retriever": {"top_k": 1}, "Reader": {"top_k": 1}})
|
||||
assert prediction is not None
|
||||
assert len(prediction["answers"]) == 1
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
|
||||
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
||||
def test_extractive_qa_answers_with_translator(reader, retriever_with_docs, en_to_de_translator, de_to_en_translator):
|
||||
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
|
||||
pipeline = TranslationWrapperPipeline(
|
||||
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
|
||||
)
|
||||
|
||||
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
|
||||
assert prediction is not None
|
||||
assert prediction["query"] == "Wer lebt in Berlin?"
|
||||
assert "Carla" in prediction["answers"][0].answer
|
||||
assert prediction["answers"][0].score <= 1
|
||||
assert prediction["answers"][0].score >= 0
|
||||
assert prediction["answers"][0].meta["meta_field"] == "test1"
|
||||
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user