haystack/test/test_embedding_retriever.py
Lalit Pagaria 9f7f95221f
Milvus integration (#771)
* Initial commit for Milvus integration

* Add latest docstring and tutorial changes

* Updating implementation of Milvus document store

* Add latest docstring and tutorial changes

* Adding tests and updating doc string

* Add latest docstring and tutorial changes

* Fixing issue caught by tests

* Addressing review comments

* Fixing mypy detected issue

* Fixing issue caught in test about sorting of vector ids

* fixing test

* Fixing generator test failure

* update docstrings

* Addressing review comments about multiple network call while fetching embedding from milvus server

* Add latest docstring and tutorial changes

* Ignoring mypy issue while converting vector_id to int

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Malte Pietsch <malte.pietsch@deepset.ai>
2021-01-29 13:29:12 +01:00

36 lines
2.1 KiB
Python

import pytest
from haystack import Finder
@pytest.mark.slow
@pytest.mark.elasticsearch
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus"], indirect=True)
@pytest.mark.parametrize("retriever", ["embedding"], indirect=True)
def test_embedding_retriever(retriever, document_store):
documents = [
{'text': 'By running tox in the command line!', 'meta': {'name': 'How to test this library?', 'question': 'How to test this library?'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
]
embedded = []
for doc in documents:
doc['embedding'] = retriever.embed([doc['meta']['question']])[0]
embedded.append(doc)
document_store.write_documents(embedded)
finder = Finder(reader=None, retriever=retriever)
prediction = finder.get_answers_via_similar_questions(question="How to test this?", top_k_retriever=1)
assert len(prediction.get('answers', [])) == 1