mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 11:56:35 +00:00
fix: write metadata to SQL Document Store when duplicate_documents!="overwrite" (#3548)
* add_all fixes the bug * improved test
This commit is contained in:
parent
6a48ace9b9
commit
9de56b0283
@ -406,24 +406,23 @@ class SQLDocumentStore(BaseDocumentStore):
|
||||
meta_orms.append(MetaDocumentORM(name=key, value=value))
|
||||
except TypeError as ex:
|
||||
logger.error("Document %s - %s", doc.id, ex)
|
||||
doc_mapping = {
|
||||
"id": doc.id,
|
||||
"content": doc.to_dict()["content"],
|
||||
"content_type": doc.content_type,
|
||||
"vector_id": vector_id,
|
||||
"meta": meta_orms,
|
||||
"index": index,
|
||||
}
|
||||
doc_orm = DocumentORM(
|
||||
id=doc.id,
|
||||
content=doc.to_dict()["content"],
|
||||
content_type=doc.content_type,
|
||||
vector_id=vector_id,
|
||||
meta=meta_orms,
|
||||
index=index,
|
||||
)
|
||||
if duplicate_documents == "overwrite":
|
||||
doc_orm = DocumentORM(**doc_mapping)
|
||||
# First old meta data cleaning is required
|
||||
self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete()
|
||||
self.session.merge(doc_orm)
|
||||
else:
|
||||
docs_orm.append(doc_mapping)
|
||||
docs_orm.append(doc_orm)
|
||||
|
||||
if docs_orm:
|
||||
self.session.bulk_insert_mappings(DocumentORM, docs_orm)
|
||||
self.session.add_all(docs_orm)
|
||||
|
||||
try:
|
||||
self.session.commit()
|
||||
|
@ -82,11 +82,12 @@ class DocumentStoreBaseTestAbstract:
|
||||
@pytest.mark.integration
|
||||
def test_write_with_duplicate_doc_ids(self, ds):
|
||||
duplicate_documents = [
|
||||
Document(content="Doc1", id_hash_keys=["content"]),
|
||||
Document(content="Doc1", id_hash_keys=["content"]),
|
||||
Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
|
||||
Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
|
||||
]
|
||||
ds.write_documents(duplicate_documents, duplicate_documents="skip")
|
||||
assert len(ds.get_all_documents()) == 1
|
||||
assert ds.get_all_documents()[0] == duplicate_documents[0]
|
||||
with pytest.raises(Exception):
|
||||
ds.write_documents(duplicate_documents, duplicate_documents="fail")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user