haystack/test/test_pipeline_extractive_qa.py

import pytest

from haystack.pipelines import TranslationWrapperPipeline, ExtractiveQAPipeline

from haystack.schema import Answer


@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers(reader, retriever_with_docs, document_store_with_docs):
    pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
    prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}})
    assert prediction is not None
    assert type(prediction["answers"][0]) == Answer
    assert prediction["query"] == "Who lives in Berlin?"
    assert prediction["answers"][0].answer == "Carla"
    assert prediction["answers"][0].score <= 1
    assert prediction["answers"][0].score >= 0
    assert prediction["answers"][0].meta["meta_field"] == "test1"
    assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"

    assert len(prediction["answers"]) == 3


@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers_without_normalized_scores(reader_without_normalized_scores, retriever_with_docs):
    pipeline = ExtractiveQAPipeline(reader=reader_without_normalized_scores, retriever=retriever_with_docs)
    prediction = pipeline.run(query="Who lives in Berlin?", params={"Reader": {"top_k": 3}})
    assert prediction is not None
    assert prediction["query"] == "Who lives in Berlin?"
    assert prediction["answers"][0].answer == "Carla"
    assert prediction["answers"][0].score <= 11
    assert prediction["answers"][0].score >= 10
    assert prediction["answers"][0].meta["meta_field"] == "test1"
    assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"

    assert len(prediction["answers"]) == 3


@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_offsets(reader, retriever_with_docs):
    pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
    prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 5}})

    start = prediction["answers"][0].offsets_in_context[0].start
    end = prediction["answers"][0].offsets_in_context[0].end

    assert start == 11
    assert end == 16

    assert prediction["answers"][0].context[start:end] == prediction["answers"][0].answer


@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers_single_result(reader, retriever_with_docs):
    pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
    query = "testing finder"
    prediction = pipeline.run(query=query, params={"Retriever": {"top_k": 1}, "Reader": {"top_k": 1}})
    assert prediction is not None
    assert len(prediction["answers"]) == 1


@pytest.mark.slow
@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)
def test_extractive_qa_answers_with_translator(reader, retriever_with_docs, en_to_de_translator, de_to_en_translator):
    base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)
    pipeline = TranslationWrapperPipeline(
        input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline
    )

    prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})
    assert prediction is not None
    assert prediction["query"] == "Wer lebt in Berlin?"
    assert "Carla" in prediction["answers"][0].answer
    assert prediction["answers"][0].score <= 1
    assert prediction["answers"][0].score >= 0
    assert prediction["answers"][0].meta["meta_field"] == "test1"
    assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00			`import pytest`

Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`from haystack.pipelines import TranslationWrapperPipeline, ExtractiveQAPipeline`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00
Calculation of metrics and presentation of eval results (#1760) * retriever metrics added * Add latest docstring and tutorial changes * answer and document level matching metrics implemented * Add latest docstring and tutorial changes * answer related metrics for retriever * basic reader metrics implemented * handle no_answers * fix typing * fix tests * fix tests without sas * first draft for simulated top k * rename sas and f1 columns in dataframe * refactoring of EvaluationResult * Add latest docstring and tutorial changes * more eval tests added * fix sas expected value precision * distinction between ir and qa recall * EvaluationResult.worst_queries() implemented * print_evaluation_report() added * eval report for QA Pipeline improved * dynamic metrics for worst queries calc * Add latest docstring and tutorial changes * method names adjusted * simple test for print_eval_report() added * improved documentation * Add latest docstring and tutorial changes * minor formatting * Add latest docstring and tutorial changes * fix no_answer cases * adjust one docstring * Add latest docstring and tutorial changes * fix no_answer cases for sas * batchmode for sas implemented * fix for retriever metrics if there are only no_answers * fix multilabel tests * improve documentation for pipeline.eval() * streamline multilabel aggregates and docs * Add latest docstring and tutorial changes * fix multilabel tests * unify document_id * add dataframe schema description to EvaluationResult * Add latest docstring and tutorial changes * rename worst_queries to wrong_examples * Add latest docstring and tutorial changes * make query digesting standard pipelines work with pipeline.eval() * Add latest docstring and tutorial changes * tests for multi retriever pipelines added * remove unnecessary import * print_eval_report(): support all pipelines without junctions * Add latest docstring and tutorial changes * fix typos * Add latest docstring and tutorial changes * fix minor simulated_top_k bug and use memory documentstore throughout tests * sas model param description improved * Add latest docstring and tutorial changes * rename recall metrics * Add latest docstring and tutorial changes * fix mean average precision link * Add latest docstring and tutorial changes * adjust sas description docstring * Add latest docstring and tutorial changes * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai> 2021-11-30 19:26:34 +01:00			`from haystack.schema import Answer`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00

			`@pytest.mark.slow`
			`@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)`
			`def test_extractive_qa_answers(reader, retriever_with_docs, document_store_with_docs):`
			`pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)`
fix pip backtracking issue (#2281) * fix pip backtracking issue * restrict azure-core version * Remove the trailing comma * Add skip_magic_trailing_comma in pyproject.toml for pydoc compatibility * Pin pydoc-markdown _again_ Co-authored-by: Sara Zan <sarazanzo94@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-03-07 19:25:33 +01:00			`prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 3}})`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00			`assert prediction is not None`
			`assert type(prediction["answers"][0]) == Answer`
			`assert prediction["query"] == "Who lives in Berlin?"`
			`assert prediction["answers"][0].answer == "Carla"`
			`assert prediction["answers"][0].score <= 1`
			`assert prediction["answers"][0].score >= 0`
			`assert prediction["answers"][0].meta["meta_field"] == "test1"`
Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00
			`assert len(prediction["answers"]) == 3`


			`@pytest.mark.slow`
			`@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)`
			`def test_extractive_qa_answers_without_normalized_scores(reader_without_normalized_scores, retriever_with_docs):`
			`pipeline = ExtractiveQAPipeline(reader=reader_without_normalized_scores, retriever=retriever_with_docs)`
Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`prediction = pipeline.run(query="Who lives in Berlin?", params={"Reader": {"top_k": 3}})`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00			`assert prediction is not None`
			`assert prediction["query"] == "Who lives in Berlin?"`
			`assert prediction["answers"][0].answer == "Carla"`
			`assert prediction["answers"][0].score <= 11`
			`assert prediction["answers"][0].score >= 10`
			`assert prediction["answers"][0].meta["meta_field"] == "test1"`
Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00
			`assert len(prediction["answers"]) == 3`


			`@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)`
			`def test_extractive_qa_offsets(reader, retriever_with_docs):`
			`pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)`
			`prediction = pipeline.run(query="Who lives in Berlin?", params={"Retriever": {"top_k": 5}})`

			`start = prediction["answers"][0].offsets_in_context[0].start`
			`end = prediction["answers"][0].offsets_in_context[0].end`

			`assert start == 11`
			`assert end == 16`

Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`assert prediction["answers"][0].context[start:end] == prediction["answers"][0].answer`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00

			`@pytest.mark.slow`
			`@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)`
			`def test_extractive_qa_answers_single_result(reader, retriever_with_docs):`
			`pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)`
			`query = "testing finder"`
Pipeline node names validation (#1601) * Add node names validation * Add tests * Improve test and test that params exists before validating * Fix the REST API * Use minilm-uncased-squad2 instead of roberta-base-squad2 * Use roberta model for test_pipeline.yaml * Turn off TOKENIZERS_PARALLELISM in generator tests (#1605) * Account for non-targeted parameters * Restore previous parameters handling in the rest api Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Julian Risch <julian.risch@deepset.ai> 2021-10-19 15:22:44 +02:00			`prediction = pipeline.run(query=query, params={"Retriever": {"top_k": 1}, "Reader": {"top_k": 1}})`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00			`assert prediction is not None`
			`assert len(prediction["answers"]) == 1`


			`@pytest.mark.slow`
			`@pytest.mark.parametrize("retriever_with_docs", ["tfidf"], indirect=True)`
Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`def test_extractive_qa_answers_with_translator(reader, retriever_with_docs, en_to_de_translator, de_to_en_translator):`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00			`base_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever_with_docs)`
			`pipeline = TranslationWrapperPipeline(`
fix pip backtracking issue (#2281) * fix pip backtracking issue * restrict azure-core version * Remove the trailing comma * Add skip_magic_trailing_comma in pyproject.toml for pydoc compatibility * Pin pydoc-markdown _again_ Co-authored-by: Sara Zan <sarazanzo94@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-03-07 19:25:33 +01:00			`input_translator=de_to_en_translator, output_translator=en_to_de_translator, pipeline=base_pipeline`
Limit generator tests to memory doc store; split pipeline tests (#1602) * Limit generator tests to memory doc store; split pipeline tests * Add latest docstring and tutorial changes Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2021-10-15 15:37:46 +02:00			`)`

			`prediction = pipeline.run(query="Wer lebt in Berlin?", params={"Reader": {"top_k": 3}})`
			`assert prediction is not None`
			`assert prediction["query"] == "Wer lebt in Berlin?"`
			`assert "Carla" in prediction["answers"][0].answer`
			`assert prediction["answers"][0].score <= 1`
			`assert prediction["answers"][0].score >= 0`
			`assert prediction["answers"][0].meta["meta_field"] == "test1"`
Apply black formatting (#2115) * Testing black on ui/ * Applying black on docstores * Add latest docstring and tutorial changes * Create a single GH action for Black and docs to reduce commit noise to the minimum, slightly refactor the OpenAPI action too * Remove comments * Relax constraints on pydoc-markdown * Split temporary black from the docs. Pydoc-markdown was obsolete and needs a separate PR to upgrade * Fix a couple of bugs * Add a type: ignore that was missing somehow * Give path to black * Apply Black * Apply Black * Relocate a couple of type: ignore * Update documentation * Make Linux CI run after applying Black * Triggering Black * Apply Black * Remove dependency, does not work well * Remove manually double trailing commas * Update documentation Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> 2022-02-03 13:43:18 +01:00			`assert prediction["answers"][0].context == "My name is Carla and I live in Berlin"`