2020-12-14 18:15:44 +01:00
|
|
|
import pytest
|
2020-10-14 16:15:04 +02:00
|
|
|
|
|
|
|
|
2020-12-14 18:15:44 +01:00
|
|
|
@pytest.mark.parametrize("retriever", ["tfidf"], indirect=True)
|
|
|
|
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
|
|
|
|
def test_tfidf_retriever(document_store, retriever):
|
2020-04-28 07:10:32 -07:00
|
|
|
|
|
|
|
test_docs = [
|
2020-07-31 11:34:06 +02:00
|
|
|
{"id": "26f84672c6d7aaeb8e2cd53e9c62d62d", "name": "testing the finder 1", "text": "godzilla says hello"},
|
2020-04-28 07:10:32 -07:00
|
|
|
{"name": "testing the finder 2", "text": "optimus prime says bye"},
|
|
|
|
{"name": "testing the finder 3", "text": "alien says arghh"}
|
|
|
|
]
|
|
|
|
|
2020-12-14 18:15:44 +01:00
|
|
|
document_store.write_documents(test_docs)
|
2020-04-28 07:10:32 -07:00
|
|
|
|
2020-12-14 18:15:44 +01:00
|
|
|
retriever.fit()
|
|
|
|
doc = retriever.retrieve("godzilla", top_k=1)[0]
|
2020-08-03 16:20:17 +02:00
|
|
|
assert doc.id == "26f84672c6d7aaeb8e2cd53e9c62d62d"
|
2020-07-31 11:34:06 +02:00
|
|
|
assert doc.text == 'godzilla says hello'
|
|
|
|
assert doc.meta == {"name": "testing the finder 1"}
|