From 2bc7fe1a08d6325f1d72344d88a9c405ccb9a153 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Mon, 24 Jul 2023 17:07:45 +0200 Subject: [PATCH] test: reactivate unit tests in `test_eval.py` (#5255) * Activate tests that follow unit test and integration test rules * Adding more integration labels * Change name to better reflect complexity of test * Remove mark integration tags, move test to doc store test for add_eval_data * Removing incorrect integration label * Deactivated document store test b/c it fails for Weaviate and pinecone * Remove unit label since test needs to be refactored to be considered a unit test * Undo changes * Undo change * Check every field in the load evaluation result * Add back label and add skip reason * Use pytest skip instead of TODO --- haystack/testing/document_store.py | 14 +++++++++ test/pipelines/test_eval.py | 47 ++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/haystack/testing/document_store.py b/haystack/testing/document_store.py index 6e12da675..cba0d5950 100644 --- a/haystack/testing/document_store.py +++ b/haystack/testing/document_store.py @@ -538,6 +538,20 @@ class DocumentStoreBaseTestAbstract: # Some document stores normalize the embedding on save, let's just compare the length assert doc_to_write["custom_embedding_field"].shape == documents[0].embedding.shape + @pytest.mark.skip(reason="This currently fails for Weaviate and Pinecone") + @pytest.mark.integration + @pytest.mark.parametrize("batch_size", [None, 20]) + def test_add_eval_data(self, ds, batch_size, samples_path): + # add eval data (SQUAD format) + ds.add_eval_data( + filename=samples_path / "squad" / "small.json", + doc_index=ds.index, + label_index=ds.label_index, + batch_size=batch_size, + ) + assert ds.get_document_count() == 87 + assert ds.get_label_count() == 1214 + # # Unit tests # diff --git a/test/pipelines/test_eval.py b/test/pipelines/test_eval.py index 276f116c7..a507e28e5 100644 --- a/test/pipelines/test_eval.py +++ b/test/pipelines/test_eval.py @@ -7,7 +7,6 @@ import pandas as pd from copy import deepcopy import responses -from haystack.document_stores.memory import InMemoryDocumentStore from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore from haystack.nodes.answer_generator.openai import OpenAIAnswerGenerator from haystack.nodes.preprocessor import PreProcessor @@ -420,6 +419,7 @@ EVAL_TABLE_LABELS = [ ] +@pytest.mark.skip(reason="Should be an end-to-end test since it uses model inferencing") @pytest.mark.integration @pytest.mark.parametrize("document_store", ["memory"], indirect=True) @pytest.mark.parametrize("retriever", ["table_text_retriever"], indirect=True) @@ -2038,6 +2038,7 @@ def test_empty_documents_dont_fail_pipeline(reader, retriever_with_docs, eval_la ) +@pytest.mark.unit def test_load_legacy_evaluation_result(tmp_path): legacy_csv = Path(tmp_path) / "legacy.csv" with open(legacy_csv, "w") as legacy_csv: @@ -2069,7 +2070,8 @@ def test_load_legacy_evaluation_result(tmp_path): assert "content" not in eval_result["legacy"] -def test_load_evaluation_result_w_none_values(tmp_path): +@pytest.mark.unit +def test_load_evaluation_result(tmp_path): eval_result_csv = Path(tmp_path) / "Reader.csv" with open(eval_result_csv, "w") as eval_result_csv: columns = [ @@ -2141,8 +2143,43 @@ def test_load_evaluation_result_w_none_values(tmp_path): ) eval_result = EvaluationResult.load(tmp_path) + known_result = { + "multilabel_id": {0: "ddc1562602f2d6d895b91e53f83e4c16"}, + "query": {0: "who is written in the book of life"}, + "filters": {0: b"null"}, + "gold_answers": { + 0: [ + "every person who is destined for Heaven or the World to Come", + "all people considered righteous before God", + ] + }, + "answer": {0: None}, + "context": {0: None}, + "exact_match": {0: 0.0}, + "f1": {0: 0.0}, + "exact_match_context_scope": {0: 0.0}, + "f1_context_scope": {0: 0.0}, + "exact_match_document_id_scope": {0: 0.0}, + "f1_document_id_scope": {0: 0.0}, + "exact_match_document_id_and_context_scope": {0: 0.0}, + "f1_document_id_and_context_scope": {0: 0.0}, + "gold_contexts": {0: ["Book of Life - wikipedia Book of Life Jump to: navigation, search..."]}, + "rank": {0: 1.0}, + "document_ids": {0: None}, + "gold_document_ids": {0: ["de2fd2f109e11213af1ea189fd1488a3-0", "de2fd2f109e11213af1ea189fd1488a3-0"]}, + "offsets_in_document": {0: [{"start": 0, "end": 0}]}, + "gold_offsets_in_documents": {0: [{"start": 374, "end": 434}, {"start": 1107, "end": 1149}]}, + "offsets_in_context": {0: [{"start": 0, "end": 0}]}, + "gold_offsets_in_contexts": {0: [{"start": 374, "end": 434}, {"start": 1107, "end": 1149}]}, + "gold_answers_exact_match": {0: [0, 0]}, + "gold_answers_f1": {0: [0, 0]}, + "gold_documents_id_match": {0: [0.0, 0.0]}, + "gold_contexts_similarity": {0: [0.0, 0.0]}, + "type": {0: "answer"}, + "node": {0: "Reader"}, + "eval_mode": {0: "integrated"}, + "index": {0: None}, + } assert "Reader" in eval_result assert len(eval_result) == 1 - assert eval_result["Reader"].iloc[0].answer is None - assert eval_result["Reader"].iloc[0].context is None - assert eval_result["Reader"].iloc[0].document_ids is None + assert eval_result["Reader"].to_dict() == known_result