diff --git a/haystack/document_stores/opensearch.py b/haystack/document_stores/opensearch.py index eaa30897a..823a94cd9 100644 --- a/haystack/document_stores/opensearch.py +++ b/haystack/document_stores/opensearch.py @@ -443,10 +443,10 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore): "space_type" ] + # Check if desired index settings are equal to settings in existing index embedding_field_similarity = self.space_type_to_similarity[embedding_field_space_type] - if embedding_field_similarity == self.similarity: - self.embeddings_field_supports_similarity = True - else: + if embedding_field_similarity != self.similarity: + self.embeddings_field_supports_similarity = False logger.warning( f"Embedding field '{self.embedding_field}' is optimized for similarity '{embedding_field_similarity}'. " f"Falling back to slow exact vector calculation. " @@ -455,6 +455,22 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore): f"e.g. `OpenSearchDocumentStore(index='my_new_{self.similarity}_index', similarity='{self.similarity}')`." ) + # Check if desired knn engine is same as engine in existing index + elif ( + "method" in mappings["properties"][self.embedding_field] + and mappings["properties"][self.embedding_field]["method"]["engine"] != self.knn_engine + ): + self.embeddings_field_supports_similarity = False + embedding_field_engine = mappings["properties"][self.embedding_field]["method"]["engine"] + logger.warning( + f"Embedding field '{self.embedding_field}' was initially created with knn_engine " + f"'{embedding_field_engine}', but knn_engine was set to '{self.knn_engine}' when " + f"initializing OpenSearchDocumentStore. " + f"Falling back to slow exact vector calculation." + ) + else: + self.embeddings_field_supports_similarity = True + # Adjust global ef_search setting (nmslib only). If not set, default is 512. ef_search = index_settings.get("knn.algo_param", {"ef_search": 512}).get("ef_search", 512) if self.index_type == "hnsw" and ef_search != 20: diff --git a/test/document_stores/test_opensearch.py b/test/document_stores/test_opensearch.py index 8fc7451c4..83a245b7d 100644 --- a/test/document_stores/test_opensearch.py +++ b/test/document_stores/test_opensearch.py @@ -206,6 +206,20 @@ class TestOpenSearchDocumentStore: # docs with an original embedding should have the new one assert cloned_field_name in meta + @pytest.mark.integration + def test_change_knn_engine(self, ds, caplog): + assert ds.embeddings_field_supports_similarity == True + index_name = ds.index + with caplog.at_level(logging.WARNING): + ds = OpenSearchDocumentStore(port=9201, knn_engine="faiss", index=index_name) + warning = ( + "Embedding field 'embedding' was initially created with knn_engine 'nmslib', but knn_engine was " + "set to 'faiss' when initializing OpenSearchDocumentStore. Falling back to slow exact vector " + "calculation." + ) + assert ds.embeddings_field_supports_similarity == False + assert warning in caplog.text + # Unit tests @pytest.mark.unit