mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-24 09:20:13 +00:00
feat: FAISS in OpenSearch: check existing index (#3101)
* Add check for mapping for existing indices * Add test * Check if "method" field exists
This commit is contained in:
parent
cc9d39c360
commit
e2ec0d1c15
@ -443,10 +443,10 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
|
||||
"space_type"
|
||||
]
|
||||
|
||||
# Check if desired index settings are equal to settings in existing index
|
||||
embedding_field_similarity = self.space_type_to_similarity[embedding_field_space_type]
|
||||
if embedding_field_similarity == self.similarity:
|
||||
self.embeddings_field_supports_similarity = True
|
||||
else:
|
||||
if embedding_field_similarity != self.similarity:
|
||||
self.embeddings_field_supports_similarity = False
|
||||
logger.warning(
|
||||
f"Embedding field '{self.embedding_field}' is optimized for similarity '{embedding_field_similarity}'. "
|
||||
f"Falling back to slow exact vector calculation. "
|
||||
@ -455,6 +455,22 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
|
||||
f"e.g. `OpenSearchDocumentStore(index='my_new_{self.similarity}_index', similarity='{self.similarity}')`."
|
||||
)
|
||||
|
||||
# Check if desired knn engine is same as engine in existing index
|
||||
elif (
|
||||
"method" in mappings["properties"][self.embedding_field]
|
||||
and mappings["properties"][self.embedding_field]["method"]["engine"] != self.knn_engine
|
||||
):
|
||||
self.embeddings_field_supports_similarity = False
|
||||
embedding_field_engine = mappings["properties"][self.embedding_field]["method"]["engine"]
|
||||
logger.warning(
|
||||
f"Embedding field '{self.embedding_field}' was initially created with knn_engine "
|
||||
f"'{embedding_field_engine}', but knn_engine was set to '{self.knn_engine}' when "
|
||||
f"initializing OpenSearchDocumentStore. "
|
||||
f"Falling back to slow exact vector calculation."
|
||||
)
|
||||
else:
|
||||
self.embeddings_field_supports_similarity = True
|
||||
|
||||
# Adjust global ef_search setting (nmslib only). If not set, default is 512.
|
||||
ef_search = index_settings.get("knn.algo_param", {"ef_search": 512}).get("ef_search", 512)
|
||||
if self.index_type == "hnsw" and ef_search != 20:
|
||||
|
@ -206,6 +206,20 @@ class TestOpenSearchDocumentStore:
|
||||
# docs with an original embedding should have the new one
|
||||
assert cloned_field_name in meta
|
||||
|
||||
@pytest.mark.integration
|
||||
def test_change_knn_engine(self, ds, caplog):
|
||||
assert ds.embeddings_field_supports_similarity == True
|
||||
index_name = ds.index
|
||||
with caplog.at_level(logging.WARNING):
|
||||
ds = OpenSearchDocumentStore(port=9201, knn_engine="faiss", index=index_name)
|
||||
warning = (
|
||||
"Embedding field 'embedding' was initially created with knn_engine 'nmslib', but knn_engine was "
|
||||
"set to 'faiss' when initializing OpenSearchDocumentStore. Falling back to slow exact vector "
|
||||
"calculation."
|
||||
)
|
||||
assert ds.embeddings_field_supports_similarity == False
|
||||
assert warning in caplog.text
|
||||
|
||||
# Unit tests
|
||||
|
||||
@pytest.mark.unit
|
||||
|
Loading…
x
Reference in New Issue
Block a user