haystack/test/test_pipeline_extractive_qa.py
Sara Zan a59bca3661
Apply black formatting (#2115)
* Testing black on ui/

* Applying black on docstores

* Add latest docstring and tutorial changes

* Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too

* Remove comments

* Relax constraints on pydoc-markdown

* Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade

* Fix a couple of bugs

* Add a type: ignore that was missing somehow

* Give path to black

* Apply Black

* Apply Black

* Relocate a couple of type: ignore

* Update documentation

* Make Linux CI run after applying Black

* Triggering Black

* Apply Black

* Remove dependency, does not work well

* Remove manually double trailing commas

* Update documentation

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2022-02-03 13:43:18 +01:00

86 lines
3.8 KiB
Python

import pytest
from haystack.pipelines import TranslationWrapperPipeline, ExtractiveQAPipeline
from haystack.schema import Answer
@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers(reader, retriever_with_docs, document_store_with_docs):
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
prediction = pipeline.run(
query="Who lives in Berlin?",
params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}},
)
assert prediction is not None
assert type(prediction["answers"][0]) == Answer
assert prediction["query"] == "Who lives in Berlin?"
assert prediction["answers"][0].answer == "Carla"
assert prediction["answers"][0].score <= 1
assert prediction["answers"][0].score >= 0
assert prediction["answers"][0].meta["meta_field"] == "test1"
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"
assert len(prediction["answers"]) == 3
@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers_without_normalized_scores(reader_without_normalized_scores, retriever_with_docs):
pipeline = ExtractiveQAPipeline(reader=reader_without_normalized_scores, retriever=retriever_with_docs)
prediction = pipeline.run(query="Who lives in Berlin?", params={"Reader": {"top_k": 3}})
assert prediction is not None
assert prediction["query"] == "Who lives in Berlin?"
assert prediction["answers"][0].answer == "Carla"
assert prediction["answers"][0].score <= 11
assert prediction["answers"][0].score >= 10
assert prediction["answers"][0].meta["meta_field"] == "test1"
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"
assert len(prediction["answers"]) == 3
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_offsets(reader, retriever_with_docs):
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 5}})
start = prediction["answers"][0].offsets_in_context[0].start
end = prediction["answers"][0].offsets_in_context[0].end
assert start == 11
assert end == 16
assert prediction["answers"][0].context[start:end] == prediction["answers"][0].answer
@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers_single_result(reader, retriever_with_docs):
pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
query = "testing finder"
prediction = pipeline.run(query=query, params={"Retriever": {"top_k": 1}, "Reader": {"top_k": 1}})
assert prediction is not None
assert len(prediction["answers"]) == 1
@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers_with_translator(reader, retriever_with_docs, en_to_de_translator, de_to_en_translator):
base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
pipeline = TranslationWrapperPipeline(
input_translator=de_to_en_translator,
output_translator=en_to_de_translator,
pipeline=base_pipeline,
)
prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
assert prediction is not None
assert prediction["query"] == "Wer lebt in Berlin?"
assert "Carla" in prediction["answers"][0].answer
assert prediction["answers"][0].score <= 1
assert prediction["answers"][0].score >= 0
assert prediction["answers"][0].meta["meta_field"] == "test1"
assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"