2020-07-31 11:34:06 +02:00
|
|
|
import pytest
|
2020-09-16 18:33:23 +02:00
|
|
|
from haystack.document_store.base import BaseDocumentStore
|
2021-01-20 14:40:10 +01:00
|
|
|
from haystack.preprocessor.preprocessor import PreProcessor
|
2021-05-31 15:31:36 +02:00
|
|
|
from haystack.eval import EvalAnswers, EvalDocuments
|
2021-04-01 17:35:18 +02:00
|
|
|
from haystack import Pipeline
|
|
|
|
|
2021-01-21 16:00:08 +01:00
|
|
|
@pytest.mark.parametrize("batch_size", [None, 20])
|
|
|
|
def test_add_eval_data(document_store, batch_size):
|
2020-07-31 11:34:06 +02:00
|
|
|
# add eval data (SQUAD format)
|
2021-01-21 16:00:08 +01:00
|
|
|
document_store.add_eval_data(
|
|
|
|
filename="samples/squad/small.json",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
batch_size=batch_size,
|
|
|
|
)
|
2021-01-12 17:54:43 +01:00
|
|
|
|
2021-01-21 16:00:08 +01:00
|
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 87
|
|
|
|
assert document_store.get_label_count(index="haystack_test_feedback") == 1214
|
2021-01-12 17:54:43 +01:00
|
|
|
|
|
|
|
# test documents
|
2021-01-21 16:00:08 +01:00
|
|
|
docs = document_store.get_all_documents(index="haystack_test_eval_document", filters={"name": ["Normans"]})
|
2020-07-31 11:34:06 +02:00
|
|
|
assert docs[0].meta["name"] == "Normans"
|
|
|
|
assert len(docs[0].meta.keys()) == 1
|
|
|
|
|
|
|
|
# test labels
|
2021-01-21 16:00:08 +01:00
|
|
|
labels = document_store.get_all_labels(index="haystack_test_feedback")
|
|
|
|
label = None
|
|
|
|
for l in labels:
|
|
|
|
if l.question == "In what country is Normandy located?":
|
|
|
|
label = l
|
|
|
|
break
|
|
|
|
assert label.answer == "France"
|
|
|
|
assert label.no_answer == False
|
|
|
|
assert label.is_correct_answer == True
|
|
|
|
assert label.is_correct_document == True
|
|
|
|
assert label.question == "In what country is Normandy located?"
|
|
|
|
assert label.origin == "gold_label"
|
|
|
|
assert label.offset_start_in_doc == 159
|
2020-07-31 11:34:06 +02:00
|
|
|
|
|
|
|
# check combination
|
2021-01-21 16:00:08 +01:00
|
|
|
doc = document_store.get_document_by_id(label.document_id, index="haystack_test_eval_document")
|
|
|
|
start = label.offset_start_in_doc
|
|
|
|
end = start + len(label.answer)
|
|
|
|
assert doc.text[start:end] == "France"
|
2020-07-31 11:34:06 +02:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
|
|
|
def test_eval_reader(reader, document_store: BaseDocumentStore):
|
|
|
|
# add eval data (SQUAD format)
|
2021-01-21 16:00:08 +01:00
|
|
|
document_store.add_eval_data(
|
|
|
|
filename="samples/squad/tiny.json",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
)
|
|
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 2
|
2020-07-31 11:34:06 +02:00
|
|
|
# eval reader
|
2021-01-21 16:00:08 +01:00
|
|
|
reader_eval_results = reader.eval(
|
|
|
|
document_store=document_store,
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
device="cpu",
|
|
|
|
)
|
2021-02-03 11:45:18 +01:00
|
|
|
assert reader_eval_results["f1"] > 66.65
|
|
|
|
assert reader_eval_results["f1"] < 66.67
|
|
|
|
assert reader_eval_results["EM"] == 50
|
|
|
|
assert reader_eval_results["top_n_accuracy"] == 100.0
|
2020-07-31 11:34:06 +02:00
|
|
|
|
|
|
|
|
2020-10-30 18:06:02 +01:00
|
|
|
@pytest.mark.elasticsearch
|
2020-07-31 11:34:06 +02:00
|
|
|
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
|
|
|
@pytest.mark.parametrize("open_domain", [True, False])
|
2020-10-23 17:50:49 +02:00
|
|
|
@pytest.mark.parametrize("retriever", ["elasticsearch"], indirect=True)
|
2020-10-14 16:15:04 +02:00
|
|
|
def test_eval_elastic_retriever(document_store: BaseDocumentStore, open_domain, retriever):
|
2020-07-31 11:34:06 +02:00
|
|
|
# add eval data (SQUAD format)
|
2021-01-21 16:00:08 +01:00
|
|
|
document_store.add_eval_data(
|
|
|
|
filename="samples/squad/tiny.json",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
)
|
|
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 2
|
2020-07-31 11:34:06 +02:00
|
|
|
|
|
|
|
# eval retriever
|
2021-01-21 16:00:08 +01:00
|
|
|
results = retriever.eval(
|
|
|
|
top_k=1, label_index="haystack_test_feedback", doc_index="haystack_test_eval_document", open_domain=open_domain
|
|
|
|
)
|
2020-07-31 11:34:06 +02:00
|
|
|
assert results["recall"] == 1.0
|
2020-11-05 13:34:47 +01:00
|
|
|
assert results["mrr"] == 1.0
|
|
|
|
if not open_domain:
|
|
|
|
assert results["map"] == 1.0
|
2020-07-31 11:34:06 +02:00
|
|
|
|
2020-08-10 19:30:31 +02:00
|
|
|
|
2021-09-27 10:52:07 +02:00
|
|
|
# TODO simplify with a mock retriever and make it independent of elasticsearch documentstore
|
2021-04-01 17:35:18 +02:00
|
|
|
@pytest.mark.elasticsearch
|
|
|
|
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
|
|
|
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
|
|
|
@pytest.mark.parametrize("retriever", ["elasticsearch"], indirect=True)
|
|
|
|
def test_eval_pipeline(document_store: BaseDocumentStore, reader, retriever):
|
|
|
|
# add eval data (SQUAD format)
|
|
|
|
document_store.add_eval_data(
|
|
|
|
filename="samples/squad/tiny.json",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
)
|
|
|
|
|
|
|
|
labels = document_store.get_all_labels_aggregated(index="haystack_test_feedback")
|
|
|
|
|
2021-05-31 15:31:36 +02:00
|
|
|
eval_retriever = EvalDocuments()
|
2021-08-12 14:31:48 +02:00
|
|
|
eval_reader = EvalAnswers(sas_model="sentence-transformers/paraphrase-MiniLM-L3-v2",debug=True)
|
|
|
|
eval_reader_cross = EvalAnswers(sas_model="cross-encoder/stsb-TinyBERT-L-4",debug=True)
|
|
|
|
eval_reader_vanila = EvalAnswers()
|
2021-04-01 17:35:18 +02:00
|
|
|
|
|
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 2
|
|
|
|
p = Pipeline()
|
|
|
|
p.add_node(component=retriever, name="ESRetriever", inputs=["Query"])
|
2021-05-31 15:31:36 +02:00
|
|
|
p.add_node(component=eval_retriever, name="EvalDocuments", inputs=["ESRetriever"])
|
|
|
|
p.add_node(component=reader, name="QAReader", inputs=["EvalDocuments"])
|
|
|
|
p.add_node(component=eval_reader, name="EvalAnswers", inputs=["QAReader"])
|
2021-08-12 14:31:48 +02:00
|
|
|
p.add_node(component=eval_reader_cross, name="EvalAnswers_cross", inputs=["QAReader"])
|
|
|
|
p.add_node(component=eval_reader_vanila, name="EvalAnswers_vanilla", inputs=["QAReader"])
|
2021-06-02 12:09:03 +02:00
|
|
|
for l in labels:
|
2021-04-01 17:35:18 +02:00
|
|
|
res = p.run(
|
2021-06-02 12:09:03 +02:00
|
|
|
query=l.question,
|
2021-04-01 17:35:18 +02:00
|
|
|
labels=l,
|
2021-09-10 11:41:16 +02:00
|
|
|
params={"index": "haystack_test_eval_document"}
|
2021-04-01 17:35:18 +02:00
|
|
|
)
|
|
|
|
assert eval_retriever.recall == 1.0
|
2021-05-03 17:18:10 +02:00
|
|
|
assert round(eval_reader.top_k_f1, 4) == 0.8333
|
2021-04-01 17:35:18 +02:00
|
|
|
assert eval_reader.top_k_em == 0.5
|
2021-08-12 14:31:48 +02:00
|
|
|
assert round(eval_reader.top_k_sas, 3) == 0.800
|
|
|
|
assert round(eval_reader_cross.top_k_sas, 3) == 0.671
|
|
|
|
assert eval_reader.top_k_em == eval_reader_vanila.top_k_em
|
2021-04-01 17:35:18 +02:00
|
|
|
|
2021-09-27 10:52:07 +02:00
|
|
|
|
2021-01-21 16:00:08 +01:00
|
|
|
def test_eval_data_split_word(document_store):
|
2021-01-20 14:40:10 +01:00
|
|
|
# splitting by word
|
|
|
|
preprocessor = PreProcessor(
|
|
|
|
clean_empty_lines=False,
|
|
|
|
clean_whitespace=False,
|
|
|
|
clean_header_footer=False,
|
|
|
|
split_by="word",
|
|
|
|
split_length=4,
|
|
|
|
split_overlap=0,
|
2021-01-21 16:00:08 +01:00
|
|
|
split_respect_sentence_boundary=False,
|
2021-01-20 14:40:10 +01:00
|
|
|
)
|
|
|
|
|
2021-01-21 16:00:08 +01:00
|
|
|
document_store.add_eval_data(
|
|
|
|
filename="samples/squad/tiny.json",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
preprocessor=preprocessor,
|
|
|
|
)
|
|
|
|
labels = document_store.get_all_labels_aggregated(index="haystack_test_feedback")
|
|
|
|
docs = document_store.get_all_documents(index="haystack_test_eval_document")
|
2021-01-20 14:40:10 +01:00
|
|
|
assert len(docs) == 5
|
|
|
|
assert len(set(labels[0].multiple_document_ids)) == 2
|
|
|
|
|
|
|
|
|
2021-01-21 16:00:08 +01:00
|
|
|
def test_eval_data_split_passage(document_store):
|
|
|
|
# splitting by passage
|
2021-01-20 14:40:10 +01:00
|
|
|
preprocessor = PreProcessor(
|
|
|
|
clean_empty_lines=False,
|
|
|
|
clean_whitespace=False,
|
|
|
|
clean_header_footer=False,
|
|
|
|
split_by="passage",
|
|
|
|
split_length=1,
|
|
|
|
split_overlap=0,
|
|
|
|
split_respect_sentence_boundary=False
|
|
|
|
)
|
|
|
|
|
2021-01-21 16:00:08 +01:00
|
|
|
document_store.add_eval_data(
|
|
|
|
filename="samples/squad/tiny_passages.json",
|
|
|
|
doc_index="haystack_test_eval_document",
|
|
|
|
label_index="haystack_test_feedback",
|
|
|
|
preprocessor=preprocessor,
|
|
|
|
)
|
|
|
|
docs = document_store.get_all_documents(index="haystack_test_eval_document")
|
2021-01-20 14:40:10 +01:00
|
|
|
assert len(docs) == 2
|
|
|
|
assert len(docs[1].text) == 56
|