mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-06 12:07:04 +00:00
Fix embeddings_field_supports_similarity of OpenSearchDocumentStore when creating index (#3030)
* fix embeddings_field_supports_similarity when creating index * fix test
This commit is contained in:
parent
26c938a8e6
commit
668fd548a6
@ -427,26 +427,21 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
|
||||
embedding_field_space_type = index_settings["knn.space_type"]
|
||||
# embedding field with local space_type setting
|
||||
else:
|
||||
# embedding field with global space_type setting
|
||||
if "method" not in mappings["properties"][self.embedding_field]:
|
||||
embedding_field_space_type = index_settings["knn.space_type"]
|
||||
# embedding field with local space_type setting
|
||||
else:
|
||||
embedding_field_space_type = mappings["properties"][self.embedding_field]["method"][
|
||||
"space_type"
|
||||
]
|
||||
embedding_field_space_type = mappings["properties"][self.embedding_field]["method"][
|
||||
"space_type"
|
||||
]
|
||||
|
||||
embedding_field_similarity = self.space_type_to_similarity[embedding_field_space_type]
|
||||
if embedding_field_similarity == self.similarity:
|
||||
self.embeddings_field_supports_similarity = True
|
||||
else:
|
||||
logger.warning(
|
||||
f"Embedding field '{self.embedding_field}' is optimized for similarity '{embedding_field_similarity}'. "
|
||||
f"Falling back to slow exact vector calculation. "
|
||||
f"Consider cloning the embedding field optimized for '{embedding_field_similarity}' by calling clone_embedding_field(similarity='{embedding_field_similarity}', ...) "
|
||||
f"or creating a new index optimized for '{self.similarity}' by setting `similarity='{self.similarity}'` the first time you instantiate OpenSearchDocumentStore for the new index, "
|
||||
f"e.g. `OpenSearchDocumentStore(index='my_new_{self.similarity}_index', similarity='{self.similarity}')`."
|
||||
)
|
||||
embedding_field_similarity = self.space_type_to_similarity[embedding_field_space_type]
|
||||
if embedding_field_similarity == self.similarity:
|
||||
self.embeddings_field_supports_similarity = True
|
||||
else:
|
||||
logger.warning(
|
||||
f"Embedding field '{self.embedding_field}' is optimized for similarity '{embedding_field_similarity}'. "
|
||||
f"Falling back to slow exact vector calculation. "
|
||||
f"Consider cloning the embedding field optimized for '{embedding_field_similarity}' by calling clone_embedding_field(similarity='{embedding_field_similarity}', ...) "
|
||||
f"or creating a new index optimized for '{self.similarity}' by setting `similarity='{self.similarity}'` the first time you instantiate OpenSearchDocumentStore for the new index, "
|
||||
f"e.g. `OpenSearchDocumentStore(index='my_new_{self.similarity}_index', similarity='{self.similarity}')`."
|
||||
)
|
||||
|
||||
# Adjust global ef_search setting. If not set, default is 512.
|
||||
ef_search = index_settings.get("knn.algo_param", {"ef_search": 512}).get("ef_search", 512)
|
||||
@ -498,6 +493,7 @@ class OpenSearchDocumentStore(BaseElasticsearchDocumentStore):
|
||||
)
|
||||
|
||||
try:
|
||||
self.embeddings_field_supports_similarity = True
|
||||
self.client.indices.create(index=index_name, body=index_definition, headers=headers)
|
||||
except RequestError as e:
|
||||
# With multiple workers we need to avoid race conditions, where:
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import sys
|
||||
import logging
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
@ -390,9 +389,7 @@ class TestOpenSearchDocumentStore:
|
||||
mocked_document_store.embedding_field = "vec"
|
||||
|
||||
mocked_document_store._create_document_index(self.index_name)
|
||||
# FIXME: when `method` is missing from the field mapping, embeddings_field_supports_similarity is always
|
||||
# False but I'm not sure this is by design
|
||||
assert mocked_document_store.embeddings_field_supports_similarity is False
|
||||
assert mocked_document_store.embeddings_field_supports_similarity is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test__create_document_index_with_existing_mapping_similarity(self, mocked_document_store, index):
|
||||
@ -494,6 +491,7 @@ class TestOpenSearchDocumentStore:
|
||||
mocked_document_store._create_document_index(self.index_name)
|
||||
_, kwargs = mocked_document_store.client.indices.create.call_args
|
||||
assert kwargs["body"] == {"mappings": {"properties": {"a_number": {"type": "integer"}}}}
|
||||
assert mocked_document_store.embeddings_field_supports_similarity is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test__create_document_index_no_index_no_mapping(self, mocked_document_store):
|
||||
@ -522,6 +520,7 @@ class TestOpenSearchDocumentStore:
|
||||
},
|
||||
"settings": {"analysis": {"analyzer": {"default": {"type": "standard"}}}, "index": {"knn": True}},
|
||||
}
|
||||
assert mocked_document_store.embeddings_field_supports_similarity is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test__create_document_index_no_index_no_mapping_with_synonyms(self, mocked_document_store):
|
||||
@ -563,6 +562,7 @@ class TestOpenSearchDocumentStore:
|
||||
"index": {"knn": True},
|
||||
},
|
||||
}
|
||||
assert mocked_document_store.embeddings_field_supports_similarity is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test__create_document_index_no_index_no_mapping_with_embedding_field(self, mocked_document_store):
|
||||
@ -597,6 +597,7 @@ class TestOpenSearchDocumentStore:
|
||||
"index": {"knn": True, "knn.algo_param.ef_search": 20},
|
||||
},
|
||||
}
|
||||
assert mocked_document_store.embeddings_field_supports_similarity is True
|
||||
|
||||
@pytest.mark.unit
|
||||
def test__create_document_index_client_failure(self, mocked_document_store):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user