mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-31 09:49:48 +00:00 
			
		
		
		
	fix: write metadata to SQL Document Store when duplicate_documents!="overwrite" (#3548)
* add_all fixes the bug * improved test
This commit is contained in:
		
							parent
							
								
									6a48ace9b9
								
							
						
					
					
						commit
						9de56b0283
					
				| @ -406,24 +406,23 @@ class SQLDocumentStore(BaseDocumentStore): | ||||
|                         meta_orms.append(MetaDocumentORM(name=key, value=value)) | ||||
|                     except TypeError as ex: | ||||
|                         logger.error("Document %s - %s", doc.id, ex) | ||||
|                 doc_mapping = { | ||||
|                     "id": doc.id, | ||||
|                     "content": doc.to_dict()["content"], | ||||
|                     "content_type": doc.content_type, | ||||
|                     "vector_id": vector_id, | ||||
|                     "meta": meta_orms, | ||||
|                     "index": index, | ||||
|                 } | ||||
|                 doc_orm = DocumentORM( | ||||
|                     id=doc.id, | ||||
|                     content=doc.to_dict()["content"], | ||||
|                     content_type=doc.content_type, | ||||
|                     vector_id=vector_id, | ||||
|                     meta=meta_orms, | ||||
|                     index=index, | ||||
|                 ) | ||||
|                 if duplicate_documents == "overwrite": | ||||
|                     doc_orm = DocumentORM(**doc_mapping) | ||||
|                     # First old meta data cleaning is required | ||||
|                     self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete() | ||||
|                     self.session.merge(doc_orm) | ||||
|                 else: | ||||
|                     docs_orm.append(doc_mapping) | ||||
|                     docs_orm.append(doc_orm) | ||||
| 
 | ||||
|             if docs_orm: | ||||
|                 self.session.bulk_insert_mappings(DocumentORM, docs_orm) | ||||
|                 self.session.add_all(docs_orm) | ||||
| 
 | ||||
|             try: | ||||
|                 self.session.commit() | ||||
|  | ||||
| @ -82,11 +82,12 @@ class DocumentStoreBaseTestAbstract: | ||||
|     @pytest.mark.integration | ||||
|     def test_write_with_duplicate_doc_ids(self, ds): | ||||
|         duplicate_documents = [ | ||||
|             Document(content="Doc1", id_hash_keys=["content"]), | ||||
|             Document(content="Doc1", id_hash_keys=["content"]), | ||||
|             Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}), | ||||
|             Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}), | ||||
|         ] | ||||
|         ds.write_documents(duplicate_documents, duplicate_documents="skip") | ||||
|         assert len(ds.get_all_documents()) == 1 | ||||
|         assert ds.get_all_documents()[0] == duplicate_documents[0] | ||||
|         with pytest.raises(Exception): | ||||
|             ds.write_documents(duplicate_documents, duplicate_documents="fail") | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Stefano Fiorucci
						Stefano Fiorucci