mirror of
				https://github.com/deepset-ai/haystack.git
				synced 2025-10-30 17:29:29 +00:00 
			
		
		
		
	fix: write metadata to SQL Document Store when duplicate_documents!="overwrite" (#3548)
* add_all fixes the bug * improved test
This commit is contained in:
		
							parent
							
								
									6a48ace9b9
								
							
						
					
					
						commit
						9de56b0283
					
				| @ -406,24 +406,23 @@ class SQLDocumentStore(BaseDocumentStore): | |||||||
|                         meta_orms.append(MetaDocumentORM(name=key, value=value)) |                         meta_orms.append(MetaDocumentORM(name=key, value=value)) | ||||||
|                     except TypeError as ex: |                     except TypeError as ex: | ||||||
|                         logger.error("Document %s - %s", doc.id, ex) |                         logger.error("Document %s - %s", doc.id, ex) | ||||||
|                 doc_mapping = { |                 doc_orm = DocumentORM( | ||||||
|                     "id": doc.id, |                     id=doc.id, | ||||||
|                     "content": doc.to_dict()["content"], |                     content=doc.to_dict()["content"], | ||||||
|                     "content_type": doc.content_type, |                     content_type=doc.content_type, | ||||||
|                     "vector_id": vector_id, |                     vector_id=vector_id, | ||||||
|                     "meta": meta_orms, |                     meta=meta_orms, | ||||||
|                     "index": index, |                     index=index, | ||||||
|                 } |                 ) | ||||||
|                 if duplicate_documents == "overwrite": |                 if duplicate_documents == "overwrite": | ||||||
|                     doc_orm = DocumentORM(**doc_mapping) |  | ||||||
|                     # First old meta data cleaning is required |                     # First old meta data cleaning is required | ||||||
|                     self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete() |                     self.session.query(MetaDocumentORM).filter_by(document_id=doc.id).delete() | ||||||
|                     self.session.merge(doc_orm) |                     self.session.merge(doc_orm) | ||||||
|                 else: |                 else: | ||||||
|                     docs_orm.append(doc_mapping) |                     docs_orm.append(doc_orm) | ||||||
| 
 | 
 | ||||||
|             if docs_orm: |             if docs_orm: | ||||||
|                 self.session.bulk_insert_mappings(DocumentORM, docs_orm) |                 self.session.add_all(docs_orm) | ||||||
| 
 | 
 | ||||||
|             try: |             try: | ||||||
|                 self.session.commit() |                 self.session.commit() | ||||||
|  | |||||||
| @ -82,11 +82,12 @@ class DocumentStoreBaseTestAbstract: | |||||||
|     @pytest.mark.integration |     @pytest.mark.integration | ||||||
|     def test_write_with_duplicate_doc_ids(self, ds): |     def test_write_with_duplicate_doc_ids(self, ds): | ||||||
|         duplicate_documents = [ |         duplicate_documents = [ | ||||||
|             Document(content="Doc1", id_hash_keys=["content"]), |             Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}), | ||||||
|             Document(content="Doc1", id_hash_keys=["content"]), |             Document(content="Doc1", id_hash_keys=["content"], meta={"key1": "value1"}), | ||||||
|         ] |         ] | ||||||
|         ds.write_documents(duplicate_documents, duplicate_documents="skip") |         ds.write_documents(duplicate_documents, duplicate_documents="skip") | ||||||
|         assert len(ds.get_all_documents()) == 1 |         assert len(ds.get_all_documents()) == 1 | ||||||
|  |         assert ds.get_all_documents()[0] == duplicate_documents[0] | ||||||
|         with pytest.raises(Exception): |         with pytest.raises(Exception): | ||||||
|             ds.write_documents(duplicate_documents, duplicate_documents="fail") |             ds.write_documents(duplicate_documents, duplicate_documents="fail") | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Stefano Fiorucci
						Stefano Fiorucci