feat: FAISS in OpenSearch: check existing index (#3101)

* Add check for mapping for existing indices

* Add test

* Check if "method" field exists
This commit is contained in:
bogdankostic 2022-08-25 17:33:26 +02:00 committed by GitHub
parent cc9d39c360
commit e2ec0d1c15
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 3 deletions

View File

@ -443,10 +443,10 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
"space_type"
]
# Check if desired index settings are equal to settings in existing index
embedding_field_similarity = self.space_type_to_similarity[embedding_field_space_type]
if embedding_field_similarity == self.similarity:
self.embeddings_field_supports_similarity = True
else:
if embedding_field_similarity != self.similarity:
self.embeddings_field_supports_similarity = False
logger.warning(
f"Embedding field '{self.embedding_field}' is optimized for similarity '{embedding_field_similarity}'. "
f"Falling back to slow exact vector calculation. "
@ -455,6 +455,22 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
f"e.g. `OpenSearchDocumentStore(index='my_new_{self.similarity}_index', similarity='{self.similarity}')`."
)
# Check if desired knn engine is same as engine in existing index
elif (
"method" in mappings["properties"][self.embedding_field]
and mappings["properties"][self.embedding_field]["method"]["engine"] != self.knn_engine
):
self.embeddings_field_supports_similarity = False
embedding_field_engine = mappings["properties"][self.embedding_field]["method"]["engine"]
logger.warning(
f"Embedding field '{self.embedding_field}' was initially created with knn_engine "
f"'{embedding_field_engine}', but knn_engine was set to '{self.knn_engine}' when "
f"initializing OpenSearchDocumentStore. "
f"Falling back to slow exact vector calculation."
)
else:
self.embeddings_field_supports_similarity = True
# Adjust global ef_search setting (nmslib only). If not set, default is 512.
ef_search = index_settings.get("knn.algo_param", {"ef_search": 512}).get("ef_search", 512)
if self.index_type == "hnsw" and ef_search != 20:

View File

@ -206,6 +206,20 @@ class TestOpenSearchDocumentStore:
# docs with an original embedding should have the new one
assert cloned_field_name in meta
@pytest.mark.integration
def test_change_knn_engine(self, ds, caplog):
assert ds.embeddings_field_supports_similarity == True
index_name = ds.index
with caplog.at_level(logging.WARNING):
ds = OpenSearchDocumentStore(port=9201, knn_engine="faiss", index=index_name)
warning = (
"Embedding field 'embedding' was initially created with knn_engine 'nmslib', but knn_engine was "
"set to 'faiss' when initializing OpenSearchDocumentStore. Falling back to slow exact vector "
"calculation."
)
assert ds.embeddings_field_supports_similarity == False
assert warning in caplog.text
# Unit tests
@pytest.mark.unit