mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-10-18 19:38:57 +00:00
Move document_name attribute to meta (#217)
This commit is contained in:
parent
4c21556a79
commit
b886e054a3
@ -12,10 +12,9 @@ class Document(BaseModel):
|
||||
description="id for the source file the document was created from. In the case when a large file is divided "
|
||||
"across multiple Elasticsearch documents, this id can be used to reference original source file.",
|
||||
)
|
||||
# name: Optional[str] = Field(None, description="Title of the document")
|
||||
question: Optional[str] = Field(None, description="Question text for FAQs.")
|
||||
query_score: Optional[float] = Field(None, description="Elasticsearch query score for a retrieved document")
|
||||
meta: Dict[str, Any] = Field({}, description="")
|
||||
meta: Dict[str, Any] = Field({}, description="Meta fields for a document like name, url, or author.")
|
||||
tags: Optional[Dict[str, Any]] = Field(None, description="Tags that allow filtering of the data")
|
||||
|
||||
|
||||
@ -30,8 +29,11 @@ class BaseDocumentStore(ABC):
|
||||
"""
|
||||
Indexes documents for later queries.
|
||||
|
||||
:param documents: List of dictionaries in the format {"name": "<some-document-name>, "text": "<the-actual-text>"}.
|
||||
Optionally, further fields can be supplied depending on the child class.
|
||||
:param documents: List of dictionaries.
|
||||
Default format: {"text": "<the-actual-text>"}
|
||||
Optionally: Include meta data via {"text": "<the-actual-text>",
|
||||
"meta":{"name": "<some-document-name>, "author": "somebody", ...}}
|
||||
It can be used for filtering and is accessible in the responses of the Finder.
|
||||
|
||||
:return: None
|
||||
"""
|
||||
|
@ -117,9 +117,9 @@ class ElasticsearchDocumentStore(BaseDocumentStore):
|
||||
Indexes documents for later queries in Elasticsearch.
|
||||
|
||||
:param documents: List of dictionaries.
|
||||
Default format: {"name": "<some-document-name>, "text": "<the-actual-text>"}
|
||||
Optionally: Include meta data via {"name": "<some-document-name>,
|
||||
"text": "<the-actual-text>", "meta":{"author": "somebody", ...}}
|
||||
Default format: {"text": "<the-actual-text>"}
|
||||
Optionally: Include meta data via {"text": "<the-actual-text>",
|
||||
"meta":{"name": "<some-document-name>, "author": "somebody", ...}}
|
||||
It can be used for filtering and is accessible in the responses of the Finder.
|
||||
Advanced: If you are using your own Elasticsearch mapping, the key names in the dictionary
|
||||
should be changed to what you have set for self.text_field and self.name_field .
|
||||
|
@ -18,9 +18,9 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
||||
"""
|
||||
Indexes documents for later queries.
|
||||
|
||||
:param documents: List of dictionaries in the format {"name": "<some-document-name>, "text": "<the-actual-text>"}.
|
||||
:param documents: List of dictionaries in the format {"text": "<the-actual-text>"}.
|
||||
Optionally, you can also supply "tags": ["one-tag", "another-one"]
|
||||
or additional meta data via "meta": {"author": "someone", "url":"some-url" ...}
|
||||
or additional meta data via "meta": {"name": "<some-document-name>, "author": "someone", "url":"some-url" ...}
|
||||
|
||||
:return: None
|
||||
"""
|
||||
@ -30,19 +30,21 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
||||
return
|
||||
|
||||
for document in documents:
|
||||
name = document.get("name", None)
|
||||
text = document.get("text", None)
|
||||
text = document["text"]
|
||||
if "meta" not in document.keys():
|
||||
document["meta"] = {}
|
||||
for k, v in document.items(): # put additional fields other than text in meta
|
||||
if k not in ["text", "meta", "tags"]:
|
||||
document["meta"][k] = v
|
||||
|
||||
if name is None or text is None:
|
||||
continue
|
||||
if not text:
|
||||
raise Exception("A document cannot have empty text field.")
|
||||
|
||||
signature = name + text
|
||||
|
||||
hash = hashlib.md5(signature.encode("utf-8")).hexdigest()
|
||||
hash = hashlib.md5(text.encode("utf-8")).hexdigest()
|
||||
|
||||
self.docs[hash] = document
|
||||
|
||||
tags = document.get('tags', [])
|
||||
tags = document.get("tags", [])
|
||||
|
||||
self._map_tags_to_ids(hash, tags)
|
||||
|
||||
@ -65,12 +67,12 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
||||
document = self._convert_memory_hit_to_document(self.docs[id], doc_id=id)
|
||||
return document
|
||||
|
||||
def _convert_memory_hit_to_document(self, hit: Tuple[Any, Any], doc_id: Optional[str] = None) -> Document:
|
||||
def _convert_memory_hit_to_document(self, hit: Dict[str, Any], doc_id: Optional[str] = None) -> Document:
|
||||
document = Document(
|
||||
id=doc_id,
|
||||
text=hit[0].get('text', None),
|
||||
meta=hit[0].get('meta', {}),
|
||||
query_score=hit[1],
|
||||
text=hit.get("text", None),
|
||||
meta=hit.get("meta", {}),
|
||||
query_score=hit.get("query_score", None),
|
||||
)
|
||||
return document
|
||||
|
||||
@ -89,14 +91,21 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
||||
"use a different DocumentStore (e.g. ElasticsearchDocumentStore).")
|
||||
|
||||
if self.embedding_field is None:
|
||||
return []
|
||||
raise Exception(
|
||||
"To use query_by_embedding() 'embedding field' must "
|
||||
"be specified when initializing the document store."
|
||||
)
|
||||
|
||||
if query_emb is None:
|
||||
return []
|
||||
|
||||
candidate_docs = [self._convert_memory_hit_to_document(
|
||||
(doc, dot(query_emb, doc[self.embedding_field]) / (norm(query_emb) * norm(doc[self.embedding_field]))), doc_id=idx) for idx, doc in self.docs.items()
|
||||
]
|
||||
candidate_docs = []
|
||||
for idx, hit in self.docs.items():
|
||||
hit["query_score"] = dot(query_emb, hit[self.embedding_field]) / (
|
||||
norm(query_emb) * norm(hit[self.embedding_field])
|
||||
)
|
||||
_doc = self._convert_memory_hit_to_document(hit=hit, doc_id=idx)
|
||||
candidate_docs.append(_doc)
|
||||
|
||||
return sorted(candidate_docs, key=lambda x: x.query_score, reverse=True)[0:top_k]
|
||||
|
||||
@ -139,4 +148,7 @@ class InMemoryDocumentStore(BaseDocumentStore):
|
||||
return len(self.docs.items())
|
||||
|
||||
def get_all_documents(self) -> List[Document]:
|
||||
return [Document(id=item[0], text=item[1]['text'], name=item[1]['name'], meta=item[1].get('meta', {})) for item in self.docs.items()]
|
||||
return [
|
||||
Document(id=item[0], text=item[1]["text"], meta=item[1].get("meta", {}))
|
||||
for item in self.docs.items()
|
||||
]
|
||||
|
@ -20,7 +20,6 @@ class ORMBase(Base):
|
||||
class Document(ORMBase):
|
||||
__tablename__ = "document"
|
||||
|
||||
name = Column(String)
|
||||
text = Column(String)
|
||||
meta_data = Column(PickleType)
|
||||
|
||||
@ -96,14 +95,19 @@ class SQLDocumentStore(BaseDocumentStore):
|
||||
"""
|
||||
Indexes documents for later queries.
|
||||
|
||||
:param documents: List of dictionaries in the format {"name": "<some-document-name>, "text": "<the-actual-text>"}.
|
||||
:param documents: List of dictionaries in the format {"text": "<the-actual-text>"}.
|
||||
Optionally, you can also supply meta data via "meta": {"author": "someone", "url":"some-url" ...}
|
||||
|
||||
:return: None
|
||||
"""
|
||||
|
||||
for doc in documents:
|
||||
row = Document(name=doc["name"], text=doc["text"], meta_data=doc.get("meta", {}))
|
||||
if "meta" not in doc.keys():
|
||||
doc["meta"] = {}
|
||||
for k, v in doc.items(): # put additional fields other than text in meta
|
||||
if k not in ["text", "meta", "tags"]:
|
||||
doc["meta"][k] = v
|
||||
row = Document(text=doc["text"], meta_data=doc.get("meta", {}))
|
||||
self.session.add(row)
|
||||
self.session.commit()
|
||||
|
||||
|
@ -48,9 +48,9 @@ def convert_files_to_dicts(dir_path: str, clean_func: Optional[Callable] = None,
|
||||
for para in text.split("\n\n"):
|
||||
if not para.strip(): # skip empty paragraphs
|
||||
continue
|
||||
documents.append({"name": path.name, "text": para})
|
||||
documents.append({"text": para, "meta": {"name": path.name}})
|
||||
else:
|
||||
documents.append({"name": path.name, "text": text})
|
||||
documents.append({"text": text, "meta": {"name": path.name}})
|
||||
|
||||
return documents
|
||||
|
||||
|
@ -54,9 +54,10 @@ def xpdf_fixture():
|
||||
@pytest.fixture()
|
||||
def test_docs_xs():
|
||||
return [
|
||||
{"name": "filename1", "text": "My name is Carla and I live in Berlin", "meta": {"meta_field": "test1"}},
|
||||
{"name": "filename2", "text": "My name is Paul and I live in New York", "meta": {"meta_field": "test2"}},
|
||||
{"name": "filename3", "text": "My name is Christelle and I live in Paris", "meta": {"meta_field": "test3"}}
|
||||
{"text": "My name is Carla and I live in Berlin", "meta": {"meta_field": "test1", "name": "filename1"}},
|
||||
{"text": "My name is Paul and I live in New York", "meta": {"meta_field": "test2", "name": "filename2"}},
|
||||
{"text": "My name is Christelle and I live in Paris", "meta_field": "test3", "meta": {"name": "filename3"}}
|
||||
# last doc has meta_field at the top level for backward compatibility
|
||||
]
|
||||
|
||||
|
||||
|
@ -1,27 +1,12 @@
|
||||
from time import sleep
|
||||
|
||||
from haystack.database.elasticsearch import ElasticsearchDocumentStore
|
||||
from haystack.database.sql import SQLDocumentStore
|
||||
from haystack.indexing.utils import convert_files_to_dicts
|
||||
from haystack.database.base import Document
|
||||
|
||||
|
||||
def test_sql_write_read():
|
||||
sql_document_store = SQLDocumentStore()
|
||||
documents = convert_files_to_dicts(dir_path="samples/docs")
|
||||
sql_document_store.write_documents(documents)
|
||||
documents = sql_document_store.get_all_documents()
|
||||
assert len(documents) == 2
|
||||
doc = sql_document_store.get_document_by_id("1")
|
||||
assert doc.id
|
||||
assert doc.text
|
||||
|
||||
|
||||
def test_elasticsearch_write_read(elasticsearch_fixture):
|
||||
document_store = ElasticsearchDocumentStore()
|
||||
documents = convert_files_to_dicts(dir_path="samples/docs")
|
||||
document_store.write_documents(documents)
|
||||
sleep(2) # wait for documents to be available for query
|
||||
documents = document_store.get_all_documents()
|
||||
assert len(documents) == 2
|
||||
assert documents[0].id
|
||||
assert documents[0].text
|
||||
def test_get_all_documents(document_store_with_docs):
|
||||
documents = document_store_with_docs.get_all_documents()
|
||||
assert all(isinstance(d, Document) for d in documents)
|
||||
assert len(documents) == 3
|
||||
assert {d.meta["name"] for d in documents} == {"filename1", "filename2", "filename3"}
|
||||
assert {d.meta["meta_field"] for d in documents} == {"test1", "test2", "test3"}
|
||||
doc = document_store_with_docs.get_document_by_id(documents[0].id)
|
||||
assert doc.id == documents[0].id
|
||||
assert doc.text == documents[0].text
|
||||
|
@ -1,20 +1,8 @@
|
||||
from haystack.retriever.dpr_utils import download_dpr
|
||||
|
||||
def test_dpr_passage_encoder():
|
||||
from haystack.retriever.dense import DensePassageRetriever
|
||||
|
||||
passage = ["Let's encode this one"]
|
||||
retriever = DensePassageRetriever(document_store=None, embedding_model="dpr-bert-base-nq", gpu=False)
|
||||
emb = retriever.embed_passages(passage)[0]
|
||||
assert(emb.shape[0] == 768)
|
||||
assert(emb[0]-0.52872 < 0.001)
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
from haystack.retriever.dense import DensePassageRetriever
|
||||
|
||||
|
||||
def test_dpr_inmemory_retrieval():
|
||||
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
from haystack.retriever.dense import DensePassageRetriever
|
||||
|
||||
document_store = InMemoryDocumentStore(embedding_field="embedding")
|
||||
|
||||
documents = [
|
||||
@ -27,8 +15,13 @@ def test_dpr_inmemory_retrieval():
|
||||
|
||||
embedded = []
|
||||
for doc in documents:
|
||||
doc['embedding'] = retriever.embed_passages([doc['text']])[0]
|
||||
embedding = retriever.embed_passages([doc['text']])[0]
|
||||
doc['embedding'] = embedding
|
||||
embedded.append(doc)
|
||||
|
||||
assert (embedding.shape[0] == 768)
|
||||
assert (embedding[0] - 0.52872 < 0.001)
|
||||
|
||||
document_store.write_documents(embedded)
|
||||
|
||||
res = retriever.retrieve(query="Which philosopher attacked Schopenhauer?")
|
||||
|
@ -9,17 +9,17 @@ def test_faq_retriever_in_memory_store():
|
||||
document_store = InMemoryDocumentStore(embedding_field="embedding")
|
||||
|
||||
documents = [
|
||||
{'name': 'How to test this library?', 'text': 'By running tox in the command line!', 'meta': {'question': 'How to test this library?'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'name': 'blah blah blah', 'text': 'By running tox in the command line!', 'meta': {'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'How to test this library?', 'question': 'How to test this library?'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
{'text': 'By running tox in the command line!', 'meta': {'name': 'blah blah blah', 'question': 'blah blah blah'}},
|
||||
]
|
||||
|
||||
retriever = EmbeddingRetriever(document_store=document_store, embedding_model="deepset/sentence_bert", gpu=False)
|
||||
|
@ -5,9 +5,9 @@ from haystack.retriever.sparse import TfidfRetriever
|
||||
|
||||
def test_finder_get_answers_with_in_memory_store():
|
||||
test_docs = [
|
||||
{"name": "testing the finder 1", "text": "testing the finder with pyhton unit test 1", 'meta': {'url': 'url'}},
|
||||
{"name": "testing the finder 2", "text": "testing the finder with pyhton unit test 2", 'meta': {'url': 'url'}},
|
||||
{"name": "testing the finder 3", "text": "testing the finder with pyhton unit test 3", 'meta': {'url': 'url'}}
|
||||
{"text": "testing the finder with pyhton unit test 1", 'meta': {"name": "testing the finder 1", 'url': 'url'}},
|
||||
{"text": "testing the finder with pyhton unit test 2", 'meta': {"name": "testing the finder 2", 'url': 'url'}},
|
||||
{"text": "testing the finder with pyhton unit test 3", 'meta': {"name": "testing the finder 3", 'url': 'url'}}
|
||||
]
|
||||
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
@ -25,9 +25,9 @@ def test_finder_get_answers_with_in_memory_store():
|
||||
|
||||
def test_memory_store_get_by_tags():
|
||||
test_docs = [
|
||||
{"name": "testing the finder 1", "text": "testing the finder with pyhton unit test 1", 'meta': {'url': 'url'}},
|
||||
{"name": "testing the finder 2", "text": "testing the finder with pyhton unit test 2", 'meta': {'url': None}},
|
||||
{"name": "testing the finder 3", "text": "testing the finder with pyhton unit test 3", 'meta': {'url': 'url'}}
|
||||
{"text": "testing the finder with pyhton unit test 1", 'meta': {"name": "testing the finder 1", 'url': 'url'}},
|
||||
{"text": "testing the finder with pyhton unit test 2", 'meta': {"name": "testing the finder 2", 'url': None}},
|
||||
{"text": "testing the finder with pyhton unit test 3", 'meta': {"name": "testing the finder 3", 'url': 'url'}}
|
||||
]
|
||||
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
@ -41,9 +41,9 @@ def test_memory_store_get_by_tags():
|
||||
|
||||
def test_memory_store_get_by_tag_lists_union():
|
||||
test_docs = [
|
||||
{"name": "testing the finder 1", "text": "testing the finder with pyhton unit test 1", 'meta': {'url': 'url'}, 'tags': [{'tag2': ["1"]}]},
|
||||
{"name": "testing the finder 2", "text": "testing the finder with pyhton unit test 2", 'meta': {'url': None}, 'tags': [{'tag1': ['1']}]},
|
||||
{"name": "testing the finder 3", "text": "testing the finder with pyhton unit test 3", 'meta': {'url': 'url'}, 'tags': [{'tag2': ["1", "2"]}]}
|
||||
{"text": "testing the finder with pyhton unit test 1", 'meta': {"name": "testing the finder 1", 'url': 'url'}, 'tags': [{'tag2': ["1"]}]},
|
||||
{"text": "testing the finder with pyhton unit test 2", 'meta': {"name": "testing the finder 2", 'url': None}, 'tags': [{'tag1': ['1']}]},
|
||||
{"text": "testing the finder with pyhton unit test 3", 'meta': {"name": "testing the finder 3", 'url': 'url'}, 'tags': [{'tag2': ["1", "2"]}]}
|
||||
]
|
||||
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
@ -53,14 +53,14 @@ def test_memory_store_get_by_tag_lists_union():
|
||||
docs = document_store.get_document_ids_by_tags({'tag2': ["1"]})
|
||||
|
||||
assert docs == [
|
||||
{'name': 'testing the finder 1', 'text': 'testing the finder with pyhton unit test 1', 'meta': {'url': 'url'}, 'tags': [{'tag2': ['1']}]},
|
||||
{'name': 'testing the finder 3', 'text': 'testing the finder with pyhton unit test 3', 'meta': {'url': 'url'}, 'tags': [{'tag2': ['1', '2']}]}
|
||||
{'text': 'testing the finder with pyhton unit test 1', 'meta': {'name': 'testing the finder 1', 'url': 'url'}, 'tags': [{'tag2': ['1']}]},
|
||||
{'text': 'testing the finder with pyhton unit test 3', 'meta': {'name': 'testing the finder 3', 'url': 'url'}, 'tags': [{'tag2': ['1', '2']}]}
|
||||
]
|
||||
|
||||
|
||||
def test_memory_store_get_by_tag_lists_non_existent_tag():
|
||||
test_docs = [
|
||||
{"name": "testing the finder 1", "text": "testing the finder with pyhton unit test 1", 'meta': {'url': 'url'}, 'tags': [{'tag1': ["1"]}]},
|
||||
{"text": "testing the finder with pyhton unit test 1", 'meta': {'url': 'url', "name": "testing the finder 1"}, 'tags': [{'tag1': ["1"]}]},
|
||||
]
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
document_store = InMemoryDocumentStore()
|
||||
@ -71,10 +71,10 @@ def test_memory_store_get_by_tag_lists_non_existent_tag():
|
||||
|
||||
def test_memory_store_get_by_tag_lists_disjoint():
|
||||
test_docs = [
|
||||
{"name": "testing the finder 1", "text": "testing the finder with pyhton unit test 1", 'meta': {'url': 'url'}, 'tags': [{'tag1': ["1"]}]},
|
||||
{"name": "testing the finder 2", "text": "testing the finder with pyhton unit test 2", 'meta': {'url': None}, 'tags': [{'tag2': ['1']}]},
|
||||
{"name": "testing the finder 3", "text": "testing the finder with pyhton unit test 3", 'meta': {'url': 'url'}, 'tags': [{'tag3': ["1", "2"]}]},
|
||||
{"name": "testing the finder 4", "text": "testing the finder with pyhton unit test 3", 'meta': {'url': 'url'}, 'tags': [{'tag3': ["1", "3"]}]}
|
||||
{"text": "testing the finder with pyhton unit test 1", 'meta': {"name": "testing the finder 1", 'url': 'url'}, 'tags': [{'tag1': ["1"]}]},
|
||||
{"text": "testing the finder with pyhton unit test 2", 'meta': {"name": "testing the finder 2", 'url': None}, 'tags': [{'tag2': ['1']}]},
|
||||
{"text": "testing the finder with pyhton unit test 3", 'meta': {"name": "testing the finder 3", 'url': 'url'}, 'tags': [{'tag3': ["1", "2"]}]},
|
||||
{"text": "testing the finder with pyhton unit test 3", 'meta': {"name": "testing the finder 4", 'url': 'url'}, 'tags': [{'tag3': ["1", "3"]}]}
|
||||
]
|
||||
|
||||
from haystack.database.memory import InMemoryDocumentStore
|
||||
@ -83,4 +83,4 @@ def test_memory_store_get_by_tag_lists_disjoint():
|
||||
|
||||
docs = document_store.get_document_ids_by_tags({'tag3': ["3"]})
|
||||
|
||||
assert docs == [{'name': 'testing the finder 4', 'text': 'testing the finder with pyhton unit test 3', 'meta': {'url': 'url'}, 'tags': [{'tag3': ['1', '3']}]}]
|
||||
assert docs == [{'text': 'testing the finder with pyhton unit test 3', 'meta': {'name': 'testing the finder 4', 'url': 'url'}, 'tags': [{'tag3': ['1', '3']}]}]
|
||||
|
@ -12,7 +12,7 @@ def test_reader_basic(reader):
|
||||
def test_output(reader, test_docs_xs):
|
||||
docs = []
|
||||
for d in test_docs_xs:
|
||||
doc = Document(id=d["name"], text=d["text"], meta=d["meta"])
|
||||
doc = Document(id=d["meta"]["name"], text=d["text"], meta=d["meta"])
|
||||
docs.append(doc)
|
||||
results = reader.predict(question="Who lives in Berlin?", documents=docs, top_k=5)
|
||||
assert results is not None
|
||||
|
@ -16,4 +16,13 @@ def test_tfidf_retriever():
|
||||
|
||||
retriever = TfidfRetriever(document_store)
|
||||
retriever.fit()
|
||||
assert retriever.retrieve("godzilla", top_k=1) == [Document(id='0', text='godzilla says hello', external_source_id=None, question=None, query_score=None, meta={})]
|
||||
assert retriever.retrieve("godzilla", top_k=1) == [
|
||||
Document(
|
||||
id='0',
|
||||
text='godzilla says hello',
|
||||
external_source_id=None,
|
||||
question=None,
|
||||
query_score=None,
|
||||
meta={"name": "testing the finder 1"},
|
||||
)
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user