test: mock all Translator tests and move one to e2e (#4290)

* mock all translator tests and move one to e2e

* typo

* extract pipeline tests using translator

* remove duplicate test

* move generator test in e2e

* Update e2e/pipelines/test_extractive_qa.py

* pytest.mark.unit

* black

* remove model name as well

* remove unused fixture

* rename original and improve pipeline tests

* fixes

* pylint
This commit is contained in:
ZanSara 2023-03-01 14:52:05 +01:00 committed by GitHub
parent 7e0f9715ba
commit 165a0a5faa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 163 additions and 102 deletions

0
e2e/__init__.py Normal file
View File

View File

@ -1,6 +1,7 @@
import os
import uuid
from contextlib import contextmanager
from pathlib import Path
import pytest
@ -16,6 +17,9 @@ from haystack.document_stores import (
)
SAMPLES_PATH = Path(__file__).parent.parent / "test" / "samples"
@pytest.fixture
def docs_all_formats():
return [

View File

@ -6,7 +6,7 @@ import numpy as np
from haystack.schema import Document
from .conftest import document_store
from ..conftest import document_store
DOCUMENTS = [

View File

@ -3,7 +3,7 @@ import pytest
from haystack.nodes import EmbeddingRetriever
from haystack.pipelines import DocumentSearchPipeline
from .conftest import document_store
from ..conftest import document_store
@pytest.mark.parametrize("name", ["memory", "faiss", "milvus", "weaviate", "elasticsearch"])

View File

@ -4,7 +4,7 @@ import pandas as pd
from haystack.nodes import EmbeddingRetriever, TableTextRetriever
from .conftest import document_store
from ..conftest import document_store
@pytest.mark.parametrize("name", ["elasticsearch", "faiss", "memory", "milvus"])

0
e2e/nodes/__init__.py Normal file
View File

View File

@ -0,0 +1,13 @@
from haystack import Document
from haystack.nodes import TransformersTranslator
def test_translator():
en_to_de_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
original = "I live in Berlin"
translation = "Ich lebe in Berlin"
assert en_to_de_translator.translate(query=original) == translation
assert en_to_de_translator.translate(documents=[original])[0] == translation
assert en_to_de_translator.translate(documents=[Document(content=original)])[0].content == translation

View File

View File

@ -0,0 +1,29 @@
from haystack.nodes import TransformersTranslator, FARMReader, TfidfRetriever
from haystack.pipelines import ExtractiveQAPipeline, TranslationWrapperPipeline
from haystack.document_stores import InMemoryDocumentStore
def test_extractive_qa_answers_with_translator(docs):
en_to_de_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
de_to_en_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-de-en")
ds = InMemoryDocumentStore(use_bm25=False)
retriever = TfidfRetriever(document_store=ds)
reader = FARMReader(
model_name_or_path="deepset/bert-medium-squad2-distilled", use_gpu=False, top_k_per_sample=5, num_processes=0
)
ds.write_documents(docs)
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever)
pipeline = TranslationWrapperPipeline(
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
)
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
assert prediction is not None
assert prediction["query"] == "Wer lebt in Berlin?"
assert "Carla" in prediction["answers"][0].answer
assert prediction["answers"][0].score <= 1
assert prediction["answers"][0].score >= 0
assert prediction["answers"][0].meta["meta_field"] == "test1"
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"

View File

@ -0,0 +1,36 @@
from haystack import Document
from haystack.pipelines import TranslationWrapperPipeline, GenerativeQAPipeline
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import DensePassageRetriever, RAGenerator, TransformersTranslator
def test_generative_pipeline_with_translator():
docs = [
Document(content="The capital of Germany is the city state of Berlin."),
Document(content="Berlin is the capital and largest city of Germany by both area and population."),
]
ds = InMemoryDocumentStore(use_bm25=True)
ds.write_documents(docs)
retriever = DensePassageRetriever( # Needs DPR or RAGenerator will thrown an exception...
document_store=ds,
query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
use_gpu=False,
embed_title=True,
)
ds.update_embeddings(retriever=retriever)
rag_generator = RAGenerator(
model_name_or_path="facebook/rag-token-nq", generator_type="token", max_length=20, retriever=retriever
)
en_to_de_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
de_to_en_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-de-en")
query = "Was ist die Hauptstadt der Bundesrepublik Deutschland?"
base_pipeline = GenerativeQAPipeline(retriever=retriever, generator=rag_generator)
pipeline = TranslationWrapperPipeline(
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
)
output = pipeline.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 1}})
answers = output["answers"]
assert len(answers) == 2
assert "berlin" in answers[0].answer.lower()

View File

@ -54,7 +54,6 @@ from haystack.nodes import (
TableReader,
RCIReader,
TransformersSummarizer,
TransformersTranslator,
QuestionGenerator,
PromptTemplate,
)
@ -555,16 +554,6 @@ def summarizer():
return TransformersSummarizer(model_name_or_path="sshleifer/distilbart-xsum-12-6", use_gpu=False)
@pytest.fixture
def en_to_de_translator():
return TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-de")
@pytest.fixture
def de_to_en_translator():
return TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-de-en")
@pytest.fixture
def reader_without_normalized_scores():
return FARMReader(

View File

@ -1,26 +0,0 @@
import pytest
from haystack.pipelines import TranslationWrapperPipeline, ExtractiveQAPipeline
from .test_summarizer import SPLIT_DOCS
# Keeping few (retriever,document_store,reader) combination to reduce test time
@pytest.mark.integration
@pytest.mark.summarizer
@pytest.mark.parametrize("retriever,document_store,reader", [("embedding", "memory", "farm")], indirect=True)
def test_extractive_qa_pipeline_with_translator(
document_store, retriever, reader, en_to_de_translator, de_to_en_translator
):
document_store.write_documents(SPLIT_DOCS)
document_store.update_embeddings(retriever=retriever)
query = "Wo steht der Eiffelturm?"
base_pipeline = ExtractiveQAPipeline(retriever=retriever, reader=reader)
pipeline = TranslationWrapperPipeline(
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
)
output = pipeline.run(query=query, params={"Retriever": {"top_k": 2}})
assert len(output["documents"]) == 2
answers_texts = [el.answer for el in output["answers"]]
assert "Frankreich" in answers_texts

View File

@ -6,32 +6,12 @@ import pytest
from haystack.schema import Document
from haystack.nodes.answer_generator import Seq2SeqGenerator, OpenAIAnswerGenerator
from haystack.pipelines import TranslationWrapperPipeline, GenerativeQAPipeline
from haystack.pipelines import GenerativeQAPipeline
from haystack.nodes import PromptTemplate
import logging
# Keeping few (retriever,document_store) combination to reduce test time
@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Causes OOM on windows github runner")
@pytest.mark.integration
@pytest.mark.generator
@pytest.mark.parametrize("retriever,document_store", [("embedding", "memory")], indirect=True)
def test_generator_pipeline_with_translator(
document_store, retriever, rag_generator, en_to_de_translator, de_to_en_translator, docs_with_true_emb
):
document_store.write_documents(docs_with_true_emb)
query = "Was ist die Hauptstadt der Bundesrepublik Deutschland?"
base_pipeline = GenerativeQAPipeline(retriever=retriever, generator=rag_generator)
pipeline = TranslationWrapperPipeline(
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
)
output = pipeline.run(query=query, params={"Generator": {"top_k": 2}, "Retriever": {"top_k": 1}})
answers = output["answers"]
assert len(answers) == 2
assert "berlin" in answers[0].answer
@pytest.mark.integration
@pytest.mark.generator
def test_rag_token_generator(rag_generator, docs_with_true_emb):

View File

@ -1,60 +1,115 @@
from haystack.schema import Document
import pytest
EXPECTED_OUTPUT = "Ich lebe in Berlin"
INPUT = "I live in Berlin"
DOCUMENT_INPUT = Document(content=INPUT)
import haystack
from haystack.schema import Document
from haystack.nodes import TransformersTranslator
ORIGINAL_TEXT = "TEST QUERY"
TRANSLATION = "MOCK TRANSLATION"
class MockTokenizer:
@classmethod
def from_pretrained(cls, *a, **k):
return cls()
def __call__(self, *a, **k):
return self
def to(self, *a, **k):
return {}
def batch_decode(self, *a, **k):
return [TRANSLATION]
class MockModel:
@classmethod
def from_pretrained(cls, *a, **k):
return cls()
def generate(self, *a, **k):
return None
def to(self, *a, **k):
return None
@pytest.fixture
def mock_models(monkeypatch):
monkeypatch.setattr(haystack.nodes.translator.transformers, "AutoModelForSeq2SeqLM", MockModel)
monkeypatch.setattr(haystack.nodes.translator.transformers, "AutoTokenizer", MockTokenizer)
@pytest.fixture
def en_to_de_translator(mock_models) -> TransformersTranslator:
return TransformersTranslator(model_name_or_path="irrelevant/anyway")
@pytest.fixture
def de_to_en_translator(mock_models) -> TransformersTranslator:
return TransformersTranslator(model_name_or_path="irrelevant/anyway")
@pytest.mark.unit
def test_translator_with_query(en_to_de_translator):
assert en_to_de_translator.translate(query=INPUT) == EXPECTED_OUTPUT
assert en_to_de_translator.translate(query=ORIGINAL_TEXT) == TRANSLATION
@pytest.mark.unit
def test_translator_with_list(en_to_de_translator):
assert en_to_de_translator.translate(documents=[INPUT])[0] == EXPECTED_OUTPUT
assert en_to_de_translator.translate(documents=[ORIGINAL_TEXT])[0] == TRANSLATION
@pytest.mark.unit
def test_translator_with_document(en_to_de_translator):
assert en_to_de_translator.translate(documents=[Document(content=INPUT)])[0].content == EXPECTED_OUTPUT
assert en_to_de_translator.translate(documents=[Document(content=ORIGINAL_TEXT)])[0].content == TRANSLATION
def test_translator_with_document_preserves_input(en_to_de_translator):
original_document = Document(content=INPUT)
en_to_de_translator.translate(documents=[original_document])[0] # pylint: disable=expression-not-assigned
assert original_document.content == INPUT
@pytest.mark.unit
def test_translator_with_document_preserves_original(en_to_de_translator):
original_document = Document(content=ORIGINAL_TEXT)
en_to_de_translator.translate(documents=[original_document])
assert original_document.content == ORIGINAL_TEXT
@pytest.mark.unit
def test_translator_with_dictionary(en_to_de_translator):
assert en_to_de_translator.translate(documents=[{"content": INPUT}])[0]["content"] == EXPECTED_OUTPUT
assert en_to_de_translator.translate(documents=[{"content": ORIGINAL_TEXT}])[0]["content"] == TRANSLATION
def test_translator_with_dictionary_preserves_input(en_to_de_translator):
original_document = {"content": INPUT}
en_to_de_translator.translate(documents=[original_document])[0] # pylint: disable=expression-not-assigned
assert original_document["content"] == INPUT
@pytest.mark.unit
def test_translator_with_dictionary_preserves_original(en_to_de_translator):
original_document = {"content": ORIGINAL_TEXT}
en_to_de_translator.translate(documents=[original_document])
assert original_document["content"] == ORIGINAL_TEXT
@pytest.mark.unit
def test_translator_with_dictionary_with_dict_key(en_to_de_translator):
assert en_to_de_translator.translate(documents=[{"key": INPUT}], dict_key="key")[0]["key"] == EXPECTED_OUTPUT
assert en_to_de_translator.translate(documents=[{"key": ORIGINAL_TEXT}], dict_key="key")[0]["key"] == TRANSLATION
def test_translator_with_empty_input(en_to_de_translator):
@pytest.mark.unit
def test_translator_with_empty_original(en_to_de_translator):
with pytest.raises(AttributeError):
en_to_de_translator.translate()
@pytest.mark.unit
def test_translator_with_query_and_documents(en_to_de_translator):
with pytest.raises(AttributeError):
en_to_de_translator.translate(query=INPUT, documents=[INPUT])
en_to_de_translator.translate(query=ORIGINAL_TEXT, documents=[ORIGINAL_TEXT])
@pytest.mark.unit
def test_translator_with_dict_without_text_key(en_to_de_translator):
with pytest.raises(AttributeError):
en_to_de_translator.translate(documents=[{"text1": INPUT}])
en_to_de_translator.translate(documents=[{"text1": ORIGINAL_TEXT}])
@pytest.mark.unit
def test_translator_with_dict_with_non_string_value(en_to_de_translator):
with pytest.raises(AttributeError):
en_to_de_translator.translate(documents=[{"text": 123}])

View File

@ -60,22 +60,3 @@ def test_extractive_qa_answers_single_result(reader, retriever_with_docs):
prediction = pipeline.run(query=query, params={"Retriever": {"top_k": 1}, "Reader": {"top_k": 1}})
assert prediction is not None
assert len(prediction["answers"]) == 1
@pytest.mark.integration
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
def test_extractive_qa_answers_with_translator(reader, retriever_with_docs, en_to_de_translator, de_to_en_translator):
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
pipeline = TranslationWrapperPipeline(
input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
)
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
assert prediction is not None
assert prediction["query"] == "Wer lebt in Berlin?"
assert "Carla" in prediction["answers"][0].answer
assert prediction["answers"][0].score <= 1
assert prediction["answers"][0].score >= 0
assert prediction["answers"][0].meta["meta_field"] == "test1"
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"