mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-08 04:56:45 +00:00
Fix Document constructor accepting unused id parameter (#5826)
This commit is contained in:
parent
771113c901
commit
5c04cd6ba2
@ -128,6 +128,7 @@ class SentenceTransformersDocumentEmbedder:
|
||||
for doc, emb in zip(documents, embeddings):
|
||||
doc_as_dict = doc.to_dict()
|
||||
doc_as_dict["embedding"] = emb
|
||||
del doc_as_dict["id"]
|
||||
documents_with_embeddings.append(Document.from_dict(doc_as_dict))
|
||||
|
||||
return {"documents": documents_with_embeddings}
|
||||
|
||||
@ -63,8 +63,7 @@ class Document:
|
||||
Document, consider using `to_dict()`, modifying the dict, and then create a new Document object using
|
||||
`Document.from_dict()`.
|
||||
|
||||
:param id: Unique identifier for the document. Do not provide this value when initializing a document: it will be
|
||||
generated based on the document's attributes (see id_hash_keys).
|
||||
:param id: Unique identifier for the document. Generated based on the document's attributes (see id_hash_keys).
|
||||
:param text: Text of the document, if the document contains text.
|
||||
:param array: Array of numbers associated with the document, if the document contains matrix data like image,
|
||||
audio, video, and such.
|
||||
@ -80,7 +79,7 @@ class Document:
|
||||
:param embedding: Vector representation of the document.
|
||||
"""
|
||||
|
||||
id: str = field(default_factory=str)
|
||||
id: str = field(default_factory=str, init=False)
|
||||
text: Optional[str] = field(default=None)
|
||||
array: Optional[numpy.ndarray] = field(default=None)
|
||||
dataframe: Optional[pandas.DataFrame] = field(default=None)
|
||||
|
||||
@ -270,6 +270,7 @@ class MemoryDocumentStore:
|
||||
doc = all_documents[i]
|
||||
doc_fields = doc.to_dict()
|
||||
doc_fields["score"] = docs_scores[i]
|
||||
del doc_fields["id"]
|
||||
return_document = Document(**doc_fields)
|
||||
return_documents.append(return_document)
|
||||
return return_documents
|
||||
@ -322,6 +323,7 @@ class MemoryDocumentStore:
|
||||
doc_fields["score"] = score
|
||||
if return_embedding is False:
|
||||
doc_fields["embedding"] = None
|
||||
del doc_fields["id"]
|
||||
top_documents.append(Document(**doc_fields))
|
||||
|
||||
return top_documents
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
---
|
||||
preview:
|
||||
- |
|
||||
Remove `id` parameter from `Document` constructor as it was ignored and a new one was generated anyway.
|
||||
This is a backwards incompatible change.
|
||||
@ -380,7 +380,6 @@ def test_from_json_custom_decoder():
|
||||
assert doc == Document.from_json(
|
||||
json.dumps(
|
||||
{
|
||||
"id": doc.id,
|
||||
"text": "test text",
|
||||
"array": None,
|
||||
"dataframe": None,
|
||||
|
||||
@ -32,7 +32,7 @@ def test_document_store_class_is_registered():
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_document_store_class_with_documents():
|
||||
doc = Document(id="fake_id", text="This is a document")
|
||||
doc = Document(text="This is a document")
|
||||
MyStore = document_store_class("MyStore", documents=[doc])
|
||||
store = MyStore()
|
||||
assert store.count_documents() == 1
|
||||
@ -49,7 +49,7 @@ def test_document_store_class_with_documents_count():
|
||||
|
||||
@pytest.mark.unit
|
||||
def test_document_store_class_with_documents_and_documents_count():
|
||||
doc = Document(id="fake_id", text="This is a document")
|
||||
doc = Document(text="This is a document")
|
||||
MyStore = document_store_class("MyStore", documents=[doc], documents_count=100)
|
||||
store = MyStore()
|
||||
assert store.count_documents() == 100
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user