2020-04-28 07:10:32 -07:00
|
|
|
from haystack.database.base import Document
|
|
|
|
|
|
|
|
|
|
|
|
def test_tfidf_retriever():
|
2020-06-30 19:05:45 +02:00
|
|
|
from haystack.retriever.sparse import TfidfRetriever
|
2020-04-28 07:10:32 -07:00
|
|
|
|
|
|
|
test_docs = [
|
|
|
|
{"name": "testing the finder 1", "text": "godzilla says hello"},
|
|
|
|
{"name": "testing the finder 2", "text": "optimus prime says bye"},
|
|
|
|
{"name": "testing the finder 3", "text": "alien says arghh"}
|
|
|
|
]
|
|
|
|
|
|
|
|
from haystack.database.memory import InMemoryDocumentStore
|
|
|
|
document_store = InMemoryDocumentStore()
|
|
|
|
document_store.write_documents(test_docs)
|
|
|
|
|
|
|
|
retriever = TfidfRetriever(document_store)
|
|
|
|
retriever.fit()
|
2020-07-14 09:53:31 +02:00
|
|
|
assert retriever.retrieve("godzilla", top_k=1) == [
|
|
|
|
Document(
|
2020-07-15 14:55:07 +02:00
|
|
|
id='26f84672c6d7aaeb8e2cd53e9c62d62d',
|
2020-07-14 09:53:31 +02:00
|
|
|
text='godzilla says hello',
|
|
|
|
external_source_id=None,
|
|
|
|
question=None,
|
|
|
|
query_score=None,
|
|
|
|
meta={"name": "testing the finder 1"},
|
|
|
|
)
|
|
|
|
]
|