mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-31 20:03:38 +00:00
fix: write metadata to SQL Document Store when duplicate_documents!="overwrite" (#3548)
* add_all fixes the bug * improved test
This commit is contained in:
parent
6a48ace9b9
commit
9de56b0283
@ -406,24 +406,23 @@ class SQLDocumentStore(BaseDocumentStore):
|
|||||||
meta_orms.append(MetaDocumentORM(name=key, value=value))
|
meta_orms.append(MetaDocumentORM(name=key, value=value))
|
||||||
except TypeError as ex:
|
except TypeError as ex:
|
||||||
logger.error("Document %s - %s", doc.id, ex)
|
logger.error("Document %s - %s", doc.id, ex)
|
||||||
doc_mapping = {
|
doc_orm = DocumentORM(
|
||||||
"id": doc.id,
|
id=doc.id,
|
||||||
"content": doc.to_dict()["content"],
|
content=doc.to_dict()["content"],
|
||||||
"content_type": doc.content_type,
|
content_type=doc.content_type,
|
||||||
"vector_id": vector_id,
|
vector_id=vector_id,
|
||||||
"meta": meta_orms,
|
meta=meta_orms,
|
||||||
"index": index,
|
index=index,
|
||||||
}
|
)
|
||||||
if duplicate_documents == "overwrite":
|
if duplicate_documents == "overwrite":
|
||||||
doc_orm = DocumentORM(**doc_mapping)
|
|
||||||
# First old meta data cleaning is required
|
# First old meta data cleaning is required
|
||||||
self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete()
|
self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete()
|
||||||
self.session.merge(doc_orm)
|
self.session.merge(doc_orm)
|
||||||
else:
|
else:
|
||||||
docs_orm.append(doc_mapping)
|
docs_orm.append(doc_orm)
|
||||||
|
|
||||||
if docs_orm:
|
if docs_orm:
|
||||||
self.session.bulk_insert_mappings(DocumentORM, docs_orm)
|
self.session.add_all(docs_orm)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.session.commit()
|
self.session.commit()
|
||||||
|
@ -82,11 +82,12 @@ class DocumentStoreBaseTestAbstract:
|
|||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
def test_write_with_duplicate_doc_ids(self, ds):
|
def test_write_with_duplicate_doc_ids(self, ds):
|
||||||
duplicate_documents = [
|
duplicate_documents = [
|
||||||
Document(content="Doc1", id_hash_keys=["content"]),
|
Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
|
||||||
Document(content="Doc1", id_hash_keys=["content"]),
|
Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
|
||||||
]
|
]
|
||||||
ds.write_documents(duplicate_documents, duplicate_documents="skip")
|
ds.write_documents(duplicate_documents, duplicate_documents="skip")
|
||||||
assert len(ds.get_all_documents()) == 1
|
assert len(ds.get_all_documents()) == 1
|
||||||
|
assert ds.get_all_documents()[0] == duplicate_documents[0]
|
||||||
with pytest.raises(Exception):
|
with pytest.raises(Exception):
|
||||||
ds.write_documents(duplicate_documents, duplicate_documents="fail")
|
ds.write_documents(duplicate_documents, duplicate_documents="fail")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user