mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-23 08:52:16 +00:00

* first draft / notes on new primitives * wip label / feedback refactor * rename doc.text -> doc.content. add doc.content_type * add datatype for content * remove faq_question_field from ES and weaviate. rename text_field -> content_field in docstores. update tutorials for content field * update converters for . Add warning for empty * renam label.question -> label.query. Allow sorting of Answers. * WIP primitives * update ui/reader for new Answer format * Improve Label. First refactoring of MultiLabel. Adjust eval code * fixed workflow conflict with introducing new one (#1472) * Add latest docstring and tutorial changes * make add_eval_data() work again * fix reader formats. WIP fix _extract_docs_and_labels_from_dict * fix test reader * Add latest docstring and tutorial changes * fix another test case for reader * fix mypy in farm reader.eval() * fix mypy in farm reader.eval() * WIP ORM refactor * Add latest docstring and tutorial changes * fix mypy weaviate * make label and multilabel dataclasses * bump mypy env in CI to python 3.8 * WIP refactor Label ORM * WIP refactor Label ORM * simplify tests for individual doc stores * WIP refactoring markers of tests * test alternative approach for tests with existing parametrization * WIP refactor ORMs * fix skip logic of already parametrized tests * fix weaviate behaviour in tests - not parametrizing it in our general test cases. * Add latest docstring and tutorial changes * fix some tests * remove sql from document_store_types * fix markers for generator and pipeline test * remove inmemory marker * remove unneeded elasticsearch markers * add dataclasses-json dependency. adjust ORM to just store JSON repr * ignore type as dataclasses_json seems to miss functionality here * update readme and contributing.md * update contributing * adjust example * fix duplicate doc handling for custom index * Add latest docstring and tutorial changes * fix some ORM issues. fix get_all_labels_aggregated. * update drop flags where get_all_labels_aggregated() was used before * Add latest docstring and tutorial changes * add to_json(). add + fix tests * fix no_answer handling in label / multilabel * fix duplicate docs in memory doc store. change primary key for sql doc table * fix mypy issues * fix mypy issues * haystack/retriever/base.py * fix test_write_document_meta[elastic] * fix test_elasticsearch_custom_fields * fix test_labels[elastic] * fix crawler * fix converter * fix docx converter * fix preprocessor * fix test_utils * fix tfidf retriever. fix selection of docstore in tests with multiple fixtures / parameterizations * Add latest docstring and tutorial changes * fix crawler test. fix ocrconverter attribute * fix test_elasticsearch_custom_query * fix generator pipeline * fix ocr converter * fix ragenerator * Add latest docstring and tutorial changes * fix test_load_and_save_yaml for elasticsearch * fixes for pipeline tests * fix faq pipeline * fix pipeline tests * Add latest docstring and tutorial changes * fix weaviate * Add latest docstring and tutorial changes * trigger CI * satisfy mypy * Add latest docstring and tutorial changes * satisfy mypy * Add latest docstring and tutorial changes * trigger CI * fix question generation test * fix ray. fix Q-generation * fix translator test * satisfy mypy * wip refactor feedback rest api * fix rest api feedback endpoint * fix doc classifier * remove relation of Labels -> Docs in SQL ORM * fix faiss/milvus tests * fix doc classifier test * fix eval test * fixing eval issues * Add latest docstring and tutorial changes * fix mypy * WIP replace dataclasses-json with manual serialization * Add latest docstring and tutorial changes * revert to dataclass-json serialization for now. remove debug prints. * update docstrings * fix extractor. fix Answer Span init * fix api test * keep meta data of answers in reader.run() * fix meta handling * adress review feedback * Add latest docstring and tutorial changes * make document=None for open domain labels * add import * fix print utils * fix rest api * adress review feedback * Add latest docstring and tutorial changes * fix mypy Co-authored-by: Markus Paff <markuspaff.mp@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
183 lines
7.3 KiB
Python
183 lines
7.3 KiB
Python
import pytest
|
|
from haystack.document_store.base import BaseDocumentStore
|
|
from haystack.preprocessor.preprocessor import PreProcessor
|
|
from haystack.eval import EvalAnswers, EvalDocuments
|
|
from haystack import Pipeline
|
|
|
|
@pytest.mark.parametrize("batch_size", [None, 20])
|
|
def test_add_eval_data(document_store, batch_size):
|
|
# add eval data (SQUAD format)
|
|
document_store.add_eval_data(
|
|
filename="samples/squad/small.json",
|
|
doc_index="haystack_test_eval_document",
|
|
label_index="haystack_test_feedback",
|
|
batch_size=batch_size,
|
|
)
|
|
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 87
|
|
assert document_store.get_label_count(index="haystack_test_feedback") == 1214
|
|
|
|
# test documents
|
|
docs = document_store.get_all_documents(index="haystack_test_eval_document", filters={"name": ["Normans"]})
|
|
assert docs[0].meta["name"] == "Normans"
|
|
assert len(docs[0].meta.keys()) == 1
|
|
|
|
# test labels
|
|
labels = document_store.get_all_labels(index="haystack_test_feedback")
|
|
label = None
|
|
for l in labels:
|
|
if l.query == "In what country is Normandy located?":
|
|
label = l
|
|
break
|
|
assert label.answer.answer == "France"
|
|
assert label.no_answer == False
|
|
assert label.is_correct_answer == True
|
|
assert label.is_correct_document == True
|
|
assert label.query == "In what country is Normandy located?"
|
|
assert label.origin == "gold-label"
|
|
assert label.answer.offsets_in_document[0].start == 159
|
|
assert label.answer.context[label.answer.offsets_in_context[0].start:label.answer.offsets_in_context[0].end] == "France"
|
|
assert label.answer.document_id == label.document.id
|
|
|
|
# check combination
|
|
doc = document_store.get_document_by_id(label.document.id, index="haystack_test_eval_document")
|
|
start = label.answer.offsets_in_document[0].start
|
|
end = label.answer.offsets_in_document[0].end
|
|
assert end == start + len(label.answer.answer)
|
|
assert doc.content[start:end] == "France"
|
|
|
|
|
|
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
|
def test_eval_reader(reader, document_store: BaseDocumentStore):
|
|
# add eval data (SQUAD format)
|
|
document_store.add_eval_data(
|
|
filename="samples/squad/tiny.json",
|
|
doc_index="haystack_test_eval_document",
|
|
label_index="haystack_test_feedback",
|
|
)
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 2
|
|
# eval reader
|
|
reader_eval_results = reader.eval(
|
|
document_store=document_store,
|
|
label_index="haystack_test_feedback",
|
|
doc_index="haystack_test_eval_document",
|
|
device="cpu",
|
|
)
|
|
assert reader_eval_results["f1"] > 66.65
|
|
assert reader_eval_results["f1"] < 66.67
|
|
assert reader_eval_results["EM"] == 50
|
|
assert reader_eval_results["top_n_accuracy"] == 100.0
|
|
|
|
|
|
@pytest.mark.elasticsearch
|
|
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
|
@pytest.mark.parametrize("open_domain", [True, False])
|
|
@pytest.mark.parametrize("retriever", ["elasticsearch"], indirect=True)
|
|
def test_eval_elastic_retriever(document_store: BaseDocumentStore, open_domain, retriever):
|
|
# add eval data (SQUAD format)
|
|
document_store.add_eval_data(
|
|
filename="samples/squad/tiny.json",
|
|
doc_index="haystack_test_eval_document",
|
|
label_index="haystack_test_feedback",
|
|
)
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 2
|
|
|
|
# eval retriever
|
|
results = retriever.eval(
|
|
top_k=1, label_index="haystack_test_feedback", doc_index="haystack_test_eval_document", open_domain=open_domain
|
|
)
|
|
assert results["recall"] == 1.0
|
|
assert results["mrr"] == 1.0
|
|
if not open_domain:
|
|
assert results["map"] == 1.0
|
|
|
|
|
|
# TODO simplify with a mock retriever and make it independent of elasticsearch documentstore
|
|
@pytest.mark.elasticsearch
|
|
@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
|
|
@pytest.mark.parametrize("reader", ["farm"], indirect=True)
|
|
@pytest.mark.parametrize("retriever", ["elasticsearch"], indirect=True)
|
|
def test_eval_pipeline(document_store: BaseDocumentStore, reader, retriever):
|
|
# add eval data (SQUAD format)
|
|
document_store.add_eval_data(
|
|
filename="samples/squad/tiny.json",
|
|
doc_index="haystack_test_eval_document",
|
|
label_index="haystack_test_feedback",
|
|
)
|
|
|
|
labels = document_store.get_all_labels_aggregated(index="haystack_test_feedback",
|
|
drop_negative_labels=True,
|
|
drop_no_answers=False)
|
|
|
|
eval_retriever = EvalDocuments()
|
|
eval_reader = EvalAnswers(sas_model="sentence-transformers/paraphrase-MiniLM-L3-v2",debug=True)
|
|
eval_reader_cross = EvalAnswers(sas_model="cross-encoder/stsb-TinyBERT-L-4",debug=True)
|
|
eval_reader_vanila = EvalAnswers()
|
|
|
|
assert document_store.get_document_count(index="haystack_test_eval_document") == 2
|
|
p = Pipeline()
|
|
p.add_node(component=retriever, name="ESRetriever", inputs=["Query"])
|
|
p.add_node(component=eval_retriever, name="EvalDocuments", inputs=["ESRetriever"])
|
|
p.add_node(component=reader, name="QAReader", inputs=["EvalDocuments"])
|
|
p.add_node(component=eval_reader, name="EvalAnswers", inputs=["QAReader"])
|
|
p.add_node(component=eval_reader_cross, name="EvalAnswers_cross", inputs=["QAReader"])
|
|
p.add_node(component=eval_reader_vanila, name="EvalAnswers_vanilla", inputs=["QAReader"])
|
|
for l in labels:
|
|
res = p.run(
|
|
query=l.query,
|
|
labels=l,
|
|
params={"index": "haystack_test_eval_document"}
|
|
)
|
|
assert eval_retriever.recall == 1.0
|
|
assert round(eval_reader.top_k_f1, 4) == 0.8333
|
|
assert eval_reader.top_k_em == 0.5
|
|
assert round(eval_reader.top_k_sas, 3) == 0.800
|
|
assert round(eval_reader_cross.top_k_sas, 3) == 0.671
|
|
assert eval_reader.top_k_em == eval_reader_vanila.top_k_em
|
|
|
|
|
|
def test_eval_data_split_word(document_store):
|
|
# splitting by word
|
|
preprocessor = PreProcessor(
|
|
clean_empty_lines=False,
|
|
clean_whitespace=False,
|
|
clean_header_footer=False,
|
|
split_by="word",
|
|
split_length=4,
|
|
split_overlap=0,
|
|
split_respect_sentence_boundary=False,
|
|
)
|
|
|
|
document_store.add_eval_data(
|
|
filename="samples/squad/tiny.json",
|
|
doc_index="haystack_test_eval_document",
|
|
label_index="haystack_test_feedback",
|
|
preprocessor=preprocessor,
|
|
)
|
|
labels = document_store.get_all_labels_aggregated(index="haystack_test_feedback")
|
|
docs = document_store.get_all_documents(index="haystack_test_eval_document")
|
|
assert len(docs) == 5
|
|
assert len(set(labels[0].document_ids)) == 2
|
|
|
|
|
|
def test_eval_data_split_passage(document_store):
|
|
# splitting by passage
|
|
preprocessor = PreProcessor(
|
|
clean_empty_lines=False,
|
|
clean_whitespace=False,
|
|
clean_header_footer=False,
|
|
split_by="passage",
|
|
split_length=1,
|
|
split_overlap=0,
|
|
split_respect_sentence_boundary=False
|
|
)
|
|
|
|
document_store.add_eval_data(
|
|
filename="samples/squad/tiny_passages.json",
|
|
doc_index="haystack_test_eval_document",
|
|
label_index="haystack_test_feedback",
|
|
preprocessor=preprocessor,
|
|
)
|
|
docs = document_store.get_all_documents(index="haystack_test_eval_document")
|
|
assert len(docs) == 2
|
|
assert len(docs[1].content) == 56 |