fix: write metadata to SQL Document Store when duplicate_documents!="overwrite" (#3548)

* add_all fixes the bug

* improved test
This commit is contained in:
Stefano Fiorucci 2022-11-15 10:04:04 +01:00 committed by GitHub
parent 6a48ace9b9
commit 9de56b0283
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 13 deletions

View File

@ -406,24 +406,23 @@ class SQLDocumentStore(BaseDocumentStore):
meta_orms.append(MetaDocumentORM(name=key, value=value))
except TypeError as ex:
logger.error("Document %s - %s", doc.id, ex)
doc_mapping = {
"id": doc.id,
"content": doc.to_dict()["content"],
"content_type": doc.content_type,
"vector_id": vector_id,
"meta": meta_orms,
"index": index,
}
doc_orm = DocumentORM(
id=doc.id,
content=doc.to_dict()["content"],
content_type=doc.content_type,
vector_id=vector_id,
meta=meta_orms,
index=index,
)
if duplicate_documents == "overwrite":
doc_orm = DocumentORM(**doc_mapping)
# First old meta data cleaning is required
self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete()
self.session.merge(doc_orm)
else:
docs_orm.append(doc_mapping)
docs_orm.append(doc_orm)
if docs_orm:
self.session.bulk_insert_mappings(DocumentORM, docs_orm)
self.session.add_all(docs_orm)
try:
self.session.commit()

View File

@ -82,11 +82,12 @@ class DocumentStoreBaseTestAbstract:
@pytest.mark.integration
def test_write_with_duplicate_doc_ids(self, ds):
duplicate_documents = [
Document(content="Doc1", id_hash_keys=["content"]),
Document(content="Doc1", id_hash_keys=["content"]),
Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}),
]
ds.write_documents(duplicate_documents, duplicate_documents="skip")
assert len(ds.get_all_documents()) == 1
assert ds.get_all_documents()[0] == duplicate_documents[0]
with pytest.raises(Exception):
ds.write_documents(duplicate_documents, duplicate_documents="fail")