Fix vector_id collision in FAISS (#1961)

* Fix FAISS vector_id count

* Fix mypy errors

Co-authored-by: Yorick van Zweeden <git@yorickvanzweeden.nl>
This commit is contained in:
Yorick van Zweeden 2022-01-05 18:10:47 +01:00 committed by GitHub
parent 0b0b9689a4
commit 65cd39b533
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -305,7 +305,7 @@ class FAISSDocumentStore(SQLDocumentStore):
return return
logger.info(f"Updating embeddings for {document_count} docs...") logger.info(f"Updating embeddings for {document_count} docs...")
vector_id = self.faiss_indexes[index].ntotal vector_id = sum([self.faiss_indexes[index].ntotal for index in self.faiss_indexes.keys()])
result = self._query( result = self._query(
index=index, index=index,
@ -329,7 +329,7 @@ class FAISSDocumentStore(SQLDocumentStore):
vector_id_map = {} vector_id_map = {}
for doc in document_batch: for doc in document_batch:
vector_id_map[doc.id] = vector_id vector_id_map[str(doc.id)] = str(vector_id)
vector_id += 1 vector_id += 1
self.update_vector_ids(vector_id_map, index=index) self.update_vector_ids(vector_id_map, index=index)
progress_bar.set_description_str("Documents Processed") progress_bar.set_description_str("Documents Processed")