import os
import logging

from unittest.mock import MagicMock, patch

import pytest
import numpy as np

import opensearchpy

from haystack.document_stores.opensearch import (
    OpenSearch,
    OpenSearchDocumentStore,
    RequestsHttpConnection,
    Urllib3HttpConnection,
    RequestError,
    tqdm,
)
from haystack.errors import DocumentStoreError
from haystack.testing import DocumentStoreBaseTestAbstract

from .test_search_engine import SearchEngineDocumentStoreTestAbstract


class TestOpenSearchDocumentStore(DocumentStoreBaseTestAbstract, SearchEngineDocumentStoreTestAbstract):
    # Constants
    query_emb = np.random.random_sample(size=(2, 2))
    index_name = __name__

    # Fixtures

    @pytest.fixture
    def ds(self):
        """
        This fixture provides a working document store and takes care of keeping clean the
        OS cluster used in the tests.
        """
        labels_index_name = f"{self.index_name}_labels"
        ds = OpenSearchDocumentStore(
            index=self.index_name,
            label_index=labels_index_name,
            host=os.environ.get("OPENSEARCH_HOST", "localhost"),
            create_index=True,
            recreate_index=True,
        )

        yield ds

    @pytest.fixture
    def mocked_document_store(self, existing_index):
        """
        The fixture provides an instance of a slightly customized
        OpenSearchDocumentStore equipped with a mocked client
        """

        class DSMock(OpenSearchDocumentStore):
            # We mock a subclass to avoid messing up the actual class object
            pass

        opensearch_mock = MagicMock()
        opensearch_mock.indices.exists.return_value = True
        opensearch_mock.indices.get.return_value = {self.index_name: existing_index}
        opensearch_mock.info.return_value = {"version": {"number": "1.3.5"}}
        DSMock._init_client = MagicMock()
        DSMock._init_client.configure_mock(return_value=opensearch_mock)
        dsMock = DSMock()
        return dsMock

    @pytest.fixture
    def mocked_open_search_init(self, monkeypatch):
        mocked_init = MagicMock(return_value=None)
        monkeypatch.setattr(OpenSearch, "__init__", mocked_init)
        return mocked_init

    @pytest.fixture
    def _init_client_params(self):
        """
        The fixture provides the required arguments to call OpenSearchDocumentStore._init_client
        """
        return {
            "host": "localhost",
            "port": 9999,
            "username": "user",
            "password": "pass",
            "aws4auth": None,
            "scheme": "http",
            "ca_certs": "ca_certs",
            "verify_certs": True,
            "timeout": 42,
            "use_system_proxy": True,
        }

    @pytest.fixture
    def existing_index(self):
        return {
            "aliases": {},
            "mappings": {
                "properties": {
                    "content": {"type": "text"},
                    "embedding": {
                        "type": "knn_vector",
                        "dimension": 768,
                        "method": {
                            "engine": "nmslib",
                            "space_type": "innerproduct",
                            "name": "hnsw",
                            "parameters": {"ef_construction": 512, "m": 16},
                        },
                    },
                }
            },
            "settings": {
                "index": {
                    "creation_date": "1658337984559",
                    "number_of_shards": "1",
                    "number_of_replicas": "1",
                    "uuid": "jU5KPBtXQHOaIn2Cm2d4jg",
                    "version": {"created": "135238227"},
                    "provided_name": "existing_index",
                }
            },
        }

    # Integration tests

    @pytest.mark.integration
    def test___init__(self):
        OpenSearchDocumentStore(index="nmslib_index", create_index=True)

    @pytest.mark.integration
    @pytest.mark.parametrize("index_type", ["flat", "hnsw", "ivf", "ivf_pq"])
    def test___init___faiss(self, index_type):
        OpenSearchDocumentStore(
            index=f"faiss_index_{index_type}", recreate_index=True, knn_engine="faiss", index_type=index_type
        )

    @pytest.mark.integration
    def test___init___score_script(self):
        OpenSearchDocumentStore(index="score_script_index", create_index=True, knn_engine="score_script")

    @pytest.mark.integration
    def test_recreate_index(self, ds, documents, labels):
        ds.write_documents(documents)
        ds.write_labels(labels)

        # Create another document store on top of the previous one
        ds = OpenSearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True)
        assert len(ds.get_all_documents(index=ds.index)) == 0
        assert len(ds.get_all_labels(index=ds.label_index)) == 0

    @pytest.mark.integration
    def test_clone_embedding_field(self, ds, documents):
        cloned_field_name = "cloned"
        ds.write_documents(documents)
        ds.clone_embedding_field(cloned_field_name, "cosine")
        for doc in ds.get_all_documents():
            meta = doc.to_dict()["meta"]
            if "no_embedding" in meta:
                # docs with no embedding should be ignored
                assert cloned_field_name not in meta
            else:
                # docs with an original embedding should have the new one
                assert cloned_field_name in meta

    @pytest.mark.integration
    @pytest.mark.parametrize("knn_engine", ["nmslib", "faiss", "score_script"])
    def test_query_embedding_with_filters(self, ds: OpenSearchDocumentStore, documents, knn_engine):
        # Create another document store on top of the previous one
        ds = OpenSearchDocumentStore(
            index=ds.index, label_index=ds.label_index, recreate_index=True, knn_engine=knn_engine
        )
        ds.write_documents(documents)
        results = ds.query_by_embedding(
            query_emb=np.random.rand(768).astype(np.float32), filters={"year": "2020"}, top_k=10
        )
        assert len(results) == 3

    @pytest.mark.integration
    @pytest.mark.parametrize("use_ann", [True, False])
    def test_query_embedding_batch_with_filters(self, ds: OpenSearchDocumentStore, documents, use_ann):
        ds.embeddings_field_supports_similarity = use_ann
        ds.write_documents(documents)
        results = ds.query_by_embedding_batch(
            query_embs=[np.random.rand(768).astype(np.float32) for _ in range(2)],
            filters=[{"year": "2020"} for _ in range(2)],
            top_k=10,
        )
        assert len(results) == 2
        for result in results:
            assert len(result) == 3

    @pytest.mark.integration
    @pytest.mark.parametrize("index_type", ["ivf", "ivf_pq"])
    def test_train_index_from_documents(self, ds: OpenSearchDocumentStore, documents, index_type):
        # Create another document store on top of the previous one
        ds = OpenSearchDocumentStore(
            index=ds.index,
            label_index=ds.label_index,
            recreate_index=True,
            knn_engine="faiss",
            index_type=index_type,
            knn_parameters={"code_size": 2},
        )

        # Check that IVF indices use score_script before training
        emb_field_settings = ds.client.indices.get(ds.index)[ds.index]["mappings"]["properties"][ds.embedding_field]
        assert emb_field_settings == {"type": "knn_vector", "dimension": 768}

        ds.train_index(documents)
        # Check that embedding_field_settings have been updated
        emb_field_settings = ds.client.indices.get(ds.index)[ds.index]["mappings"]["properties"][ds.embedding_field]
        assert emb_field_settings == {"type": "knn_vector", "model_id": f"{ds.index}-ivf"}

        # Check that model uses expected parameters
        expected_model_settigns = {"index_type": index_type, "nlist": 4, "nprobes": 1}
        if index_type == "ivf_pq":
            expected_model_settigns["code_size"] = 2
            expected_model_settigns["m"] = 1
        model_endpoint = f"/_plugins/_knn/models/{ds.index}-ivf"
        response = ds.client.transport.perform_request("GET", url=model_endpoint)
        model_settings_list = [setting.split(":") for setting in response["description"].split()]
        model_settings = {k: (int(v) if v.isnumeric() else v) for k, v in model_settings_list}
        assert model_settings == expected_model_settigns

    @pytest.mark.integration
    @pytest.mark.parametrize("index_type", ["ivf", "ivf_pq"])
    def test_train_index_from_embeddings(self, ds: OpenSearchDocumentStore, documents, index_type):
        # Create another document store on top of the previous one
        ds = OpenSearchDocumentStore(
            index=ds.index,
            label_index=ds.label_index,
            recreate_index=True,
            knn_engine="faiss",
            index_type=index_type,
            knn_parameters={"code_size": 2},
        )

        # Check that IVF indices use HNSW with default settings before training
        emb_field_settings = ds.client.indices.get(ds.index)[ds.index]["mappings"]["properties"][ds.embedding_field]
        assert emb_field_settings == {"type": "knn_vector", "dimension": 768}

        embeddings = np.array([doc.embedding for doc in documents if doc.embedding is not None])
        ds.train_index(embeddings=embeddings)
        # Check that embedding_field_settings have been updated
        emb_field_settings = ds.client.indices.get(ds.index)[ds.index]["mappings"]["properties"][ds.embedding_field]
        assert emb_field_settings == {"type": "knn_vector", "model_id": f"{ds.index}-ivf"}

        # Check that model uses expected parameters
        expected_model_settigns = {"index_type": index_type, "nlist": 4, "nprobes": 1}
        if index_type == "ivf_pq":
            expected_model_settigns["code_size"] = 2
            expected_model_settigns["m"] = 1
        model_endpoint = f"/_plugins/_knn/models/{ds.index}-ivf"
        response = ds.client.transport.perform_request("GET", url=model_endpoint)
        model_settings_list = [setting.split(":") for setting in response["description"].split()]
        model_settings = {k: (int(v) if v.isnumeric() else v) for k, v in model_settings_list}
        assert model_settings == expected_model_settigns

    @pytest.mark.integration
    @pytest.mark.parametrize("index_type", ["ivf", "ivf_pq"])
    def test_train_index_with_write_documents(self, ds: OpenSearchDocumentStore, documents, index_type):
        # Create another document store on top of the previous one
        ds = OpenSearchDocumentStore(
            index=ds.index,
            label_index=ds.label_index,
            recreate_index=True,
            knn_engine="faiss",
            index_type=index_type,
            knn_parameters={"code_size": 2},
            ivf_train_size=6,
        )

        # Check that IVF indices use HNSW with default settings before training
        emb_field_settings = ds.client.indices.get(ds.index)[ds.index]["mappings"]["properties"][ds.embedding_field]
        assert emb_field_settings == {"type": "knn_vector", "dimension": 768}

        ds.write_documents(documents)
        # Check that embedding_field_settings have been updated
        emb_field_settings = ds.client.indices.get(ds.index)[ds.index]["mappings"]["properties"][ds.embedding_field]
        assert emb_field_settings == {"type": "knn_vector", "model_id": f"{ds.index}-ivf"}

        # Check that model uses expected parameters
        expected_model_settigns = {"index_type": index_type, "nlist": 4, "nprobes": 1}
        if index_type == "ivf_pq":
            expected_model_settigns["code_size"] = 2
            expected_model_settigns["m"] = 1
        model_endpoint = f"/_plugins/_knn/models/{ds.index}-ivf"
        response = ds.client.transport.perform_request("GET", url=model_endpoint)
        model_settings_list = [setting.split(":") for setting in response["description"].split()]
        model_settings = {k: (int(v) if v.isnumeric() else v) for k, v in model_settings_list}
        assert model_settings == expected_model_settigns

    # Unit tests

    @pytest.mark.unit
    def test___init___api_key_raises_warning(self, mocked_document_store, caplog):
        with caplog.at_level(logging.WARN, logger="haystack.document_stores.opensearch"):
            mocked_document_store.__init__(api_key="foo")
            mocked_document_store.__init__(api_key_id="bar")
            mocked_document_store.__init__(api_key="foo", api_key_id="bar")

        assert len(caplog.records) == 3
        for r in caplog.records:
            assert r.levelname == "WARNING"

    @pytest.mark.unit
    def test__init_client_aws4auth_and_username_raises_warning(self, mocked_open_search_init, caplog):
        _init_client_remaining_kwargs = {
            "host": "host",
            "port": 443,
            "password": "pass",
            "scheme": "https",
            "ca_certs": None,
            "verify_certs": True,
            "timeout": 10,
            "use_system_proxy": False,
        }

        with caplog.at_level(logging.WARN, logger="haystack.document_stores.opensearch"):
            OpenSearchDocumentStore._init_client(username="admin", aws4auth="foo", **_init_client_remaining_kwargs)
            OpenSearchDocumentStore._init_client(username="bar", aws4auth="foo", **_init_client_remaining_kwargs)
        assert len(caplog.records) == 2
        for r in caplog.records:
            assert r.levelname == "WARNING"

        caplog.clear()
        with caplog.at_level(logging.WARN, logger="haystack.document_stores.opensearch"):
            OpenSearchDocumentStore._init_client(username=None, aws4auth="foo", **_init_client_remaining_kwargs)
            OpenSearchDocumentStore._init_client(username="foo", aws4auth=None, **_init_client_remaining_kwargs)
        assert len(caplog.records) == 0

    @pytest.mark.unit
    def test___init___connection_test_fails(self, mocked_document_store):
        failing_client = MagicMock()
        failing_client.indices.get.side_effect = Exception("The client failed!")
        mocked_document_store._init_client.return_value = failing_client
        with pytest.raises(ConnectionError):
            mocked_document_store.__init__()

    @pytest.mark.unit
    def test___init___client_params(self, mocked_open_search_init, _init_client_params):
        """
        Ensure the Opensearch-py client was initialized with the right params
        """
        OpenSearchDocumentStore._init_client(**_init_client_params)
        assert mocked_open_search_init.called
        _, kwargs = mocked_open_search_init.call_args
        assert kwargs == {
            "hosts": [{"host": "localhost", "port": 9999}],
            "http_auth": ("user", "pass"),
            "scheme": "http",
            "ca_certs": "ca_certs",
            "verify_certs": True,
            "timeout": 42,
            "connection_class": RequestsHttpConnection,
        }

    @pytest.mark.unit
    def test__init_client_use_system_proxy_use_sys_proxy(self, mocked_open_search_init, _init_client_params):
        _init_client_params["use_system_proxy"] = False
        OpenSearchDocumentStore._init_client(**_init_client_params)
        _, kwargs = mocked_open_search_init.call_args
        assert kwargs["connection_class"] == Urllib3HttpConnection

    @pytest.mark.unit
    def test__init_client_use_system_proxy_dont_use_sys_proxy(self, mocked_open_search_init, _init_client_params):
        _init_client_params["use_system_proxy"] = True
        OpenSearchDocumentStore._init_client(**_init_client_params)
        _, kwargs = mocked_open_search_init.call_args
        assert kwargs["connection_class"] == RequestsHttpConnection

    @pytest.mark.unit
    def test__init_client_auth_methods_username_password(self, mocked_open_search_init, _init_client_params):
        _init_client_params["username"] = "user"
        _init_client_params["aws4auth"] = None
        OpenSearchDocumentStore._init_client(**_init_client_params)
        _, kwargs = mocked_open_search_init.call_args
        assert kwargs["http_auth"] == ("user", "pass")

    @pytest.mark.unit
    def test__init_client_auth_methods_aws_iam(self, mocked_open_search_init, _init_client_params):
        _init_client_params["username"] = ""
        _init_client_params["aws4auth"] = "foo"
        OpenSearchDocumentStore._init_client(**_init_client_params)
        _, kwargs = mocked_open_search_init.call_args
        assert kwargs["http_auth"] == "foo"

    @pytest.mark.unit
    def test__init_client_auth_methods_no_auth(self, mocked_open_search_init, _init_client_params):
        _init_client_params["username"] = ""
        _init_client_params["aws4auth"] = None
        OpenSearchDocumentStore._init_client(**_init_client_params)
        _, kwargs = mocked_open_search_init.call_args
        assert "http_auth" not in kwargs

    @pytest.mark.unit
    def test_query(self, mocked_document_store):
        mocked_document_store.query(query=self.query)
        kwargs = mocked_document_store.client.search.call_args.kwargs
        assert "index" in kwargs
        assert "body" in kwargs
        assert "headers" in kwargs

    @pytest.mark.unit
    def test_query_return_embedding_false(self, mocked_document_store):
        mocked_document_store.return_embedding = False
        mocked_document_store.query(self.query)
        # assert the resulting body is consistent with the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.search.call_args
        assert kwargs["body"]["_source"] == {"excludes": ["embedding"]}

    @pytest.mark.unit
    def test_query_excluded_meta_data_return_embedding_true(self, mocked_document_store):
        mocked_document_store.return_embedding = True
        mocked_document_store.excluded_meta_data = ["foo", "embedding"]
        mocked_document_store.query(self.query)
        _, kwargs = mocked_document_store.client.search.call_args
        # we expect "embedding" was removed from the final query
        assert kwargs["body"]["_source"] == {"excludes": ["foo"]}

    @pytest.mark.unit
    def test_query_excluded_meta_data_return_embedding_false(self, mocked_document_store):
        mocked_document_store.return_embedding = False
        mocked_document_store.excluded_meta_data = ["foo"]
        mocked_document_store.query(self.query)
        # assert the resulting body is consistent with the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.search.call_args
        assert kwargs["body"]["_source"] == {"excludes": ["foo", "embedding"]}

    @pytest.mark.unit
    def test_query_by_embedding_raises_if_missing_field(self, mocked_document_store):
        mocked_document_store.embedding_field = ""
        with pytest.raises(DocumentStoreError):
            mocked_document_store.query_by_embedding(self.query_emb)

    @pytest.mark.unit
    def test_query_by_embedding_raises_if_ivf_untrained(self, mocked_document_store):
        mocked_document_store.index_type = "ivf"
        mocked_document_store.ivf_train_size = 10
        with pytest.raises(DocumentStoreError, match="Index of type 'ivf' is not trained yet."):
            mocked_document_store.query_by_embedding(self.query_emb)

    @pytest.mark.unit
    def test_query_by_embedding_batch_if_ivf_untrained(self, mocked_document_store):
        mocked_document_store.index_type = "ivf"
        mocked_document_store.ivf_train_size = 10
        with pytest.raises(DocumentStoreError, match="Index of type 'ivf' is not trained yet."):
            mocked_document_store.query_by_embedding_batch([self.query_emb])

    @pytest.mark.unit
    def test_query_by_embedding_filters(self, mocked_document_store):
        assert mocked_document_store.knn_engine != "score_script"
        expected_filters = {"type": "article", "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}}
        mocked_document_store.query_by_embedding(self.query_emb, filters=expected_filters)
        # Assert the `search` method on the client was called with the filters we provided
        _, kwargs = mocked_document_store.client.search.call_args
        actual_filters = kwargs["body"]["query"]["bool"]["filter"]
        assert actual_filters["bool"]["must"] == [
            {"term": {"type": "article"}},
            {"range": {"date": {"gte": "2015-01-01", "lt": "2021-01-01"}}},
        ]

    @pytest.mark.unit
    def test_query_by_embedding_script_score_filters(self, mocked_document_store):
        mocked_document_store.knn_engine = "score_script"
        expected_filters = {"type": "article", "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}}
        mocked_document_store.query_by_embedding(self.query_emb, filters=expected_filters)
        # Assert the `search` method on the client was called with the filters we provided
        _, kwargs = mocked_document_store.client.search.call_args
        actual_filters = kwargs["body"]["query"]["script_score"]["query"]["bool"]["filter"]
        assert actual_filters["bool"]["must"] == [
            {"term": {"type": "article"}},
            {"range": {"date": {"gte": "2015-01-01", "lt": "2021-01-01"}}},
        ]

    @pytest.mark.unit
    def test_query_by_embedding_return_embedding_false(self, mocked_document_store):
        mocked_document_store.return_embedding = False
        mocked_document_store.query_by_embedding(self.query_emb)
        # assert the resulting body is consistent with the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.search.call_args
        assert kwargs["body"]["_source"] == {"excludes": ["embedding"]}

    @pytest.mark.unit
    def test_query_by_embedding_excluded_meta_data_return_embedding_true(self, mocked_document_store):
        """
        Test that when `return_embedding==True` the field should NOT be excluded even if it
        was added to `excluded_meta_data`
        """
        mocked_document_store.return_embedding = True
        mocked_document_store.excluded_meta_data = ["foo", "embedding"]
        mocked_document_store.query_by_embedding(self.query_emb)
        _, kwargs = mocked_document_store.client.search.call_args
        # we expect "embedding" was removed from the final query
        assert kwargs["body"]["_source"] == {"excludes": ["foo"]}

    @pytest.mark.unit
    def test_query_by_embedding_excluded_meta_data_return_embedding_false(self, mocked_document_store):
        """
        Test that when `return_embedding==False`, the final query excludes the `embedding` field
        even if it wasn't explicitly added to `excluded_meta_data`
        """
        mocked_document_store.return_embedding = False
        mocked_document_store.excluded_meta_data = ["foo"]
        mocked_document_store.query_by_embedding(self.query_emb)
        # assert the resulting body is consistent with the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.search.call_args
        assert kwargs["body"]["_source"] == {"excludes": ["foo", "embedding"]}

    @pytest.mark.unit
    def test_query_by_embedding_batch_uses_msearch(self, mocked_document_store):
        mocked_document_store.query_by_embedding_batch([self.query_emb for _ in range(10)])
        # assert the resulting body is consistent with the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.msearch.call_args
        assert len(kwargs["body"]) == 20  # each search has headers and request

    @pytest.mark.unit
    def test__init_indices_with_alias(self, mocked_document_store, caplog):
        mocked_document_store.client.indices.exists_alias.return_value = True

        with caplog.at_level(logging.DEBUG, logger="haystack.document_stores.search_engine"):
            mocked_document_store._init_indices(self.index_name, "labels", False, False)

        assert f"Index name {self.index_name} is an alias." in caplog.text

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_wrong_mapping_raises(self, mocked_document_store, existing_index):
        """
        Ensure the method raises if we specify a field in `search_fields` that's not text
        """
        existing_index["mappings"]["properties"]["age"] = {"type": "integer"}
        mocked_document_store.search_fields = ["age"]
        with pytest.raises(
            DocumentStoreError,
            match=f"The index '{self.index_name}' needs the 'text' type for the search_field 'age' to run full text search, but got type 'integer'.",
        ):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_create_embedding_mapping_if_missing(self, mocked_document_store):
        mocked_document_store.embedding_field = "doesnt_have_a_mapping"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        # Assert the expected body was passed to the client
        _, kwargs = mocked_document_store.client.indices.put_mapping.call_args
        assert kwargs["index"] == self.index_name
        assert kwargs["body"]["properties"]["doesnt_have_a_mapping"]["type"] == "knn_vector"

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_create_search_field_mapping_if_missing(self, mocked_document_store):
        mocked_document_store.search_fields = ["doesnt_have_a_mapping"]

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        # Assert the expected body was passed to the client
        _, kwargs = mocked_document_store.client.indices.put_mapping.call_args
        assert kwargs["index"] == self.index_name
        assert kwargs["body"]["properties"]["doesnt_have_a_mapping"]["type"] == "text"

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_with_bad_field_raises(self, mocked_document_store, existing_index):
        existing_index["mappings"]["properties"]["age"] = {"type": "integer"}
        mocked_document_store.embedding_field = "age"
        with pytest.raises(
            DocumentStoreError,
            match=f"The index '{self.index_name}' needs the 'knn_vector' type for the embedding_field 'age' to run vector search, but got type 'integer'.",
        ):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_but_no_method(self, mocked_document_store, existing_index):
        """
        We call the method passing a properly mapped field but without the `method` specified in the mapping
        """
        del existing_index["mappings"]["properties"]["embedding"]["method"]

        assert mocked_document_store.space_type == "innerproduct"
        with pytest.raises(
            DocumentStoreError,
            match=rf"Set `similarity` to one of '\['l2'\]' to properly use the embedding field 'embedding' of index '{self.index_name}'. Similarity 'dot_product' is not compatible with embedding field's space type 'l2', it requires 'innerproduct'.",
        ):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)

        # l2 is default for space_type so it must pass
        mocked_document_store.space_type = "l2"
        mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_similarity(self, mocked_document_store):
        mocked_document_store.space_type = "innerproduct"
        mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_similarity_mismatch(self, mocked_document_store):
        mocked_document_store.space_type = "cosinesimil"

        with pytest.raises(
            DocumentStoreError,
            match=rf"Set `similarity` to one of '\['dot_product'\]' to properly use the embedding field 'embedding' of index '{self.index_name}'. Similarity 'dot_product' is not compatible with embedding field's space type 'innerproduct', it requires 'cosinesimil'.",
        ):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_type_mismatch(self, mocked_document_store):
        mocked_document_store.index_type = "hnsw"

        with pytest.raises(
            DocumentStoreError,
            match=f"The index_type 'hnsw' needs '80' as ef_construction value. Currently, the value for embedding field 'embedding' of index '{self.index_name}' is '512'.",
        ):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_change_knn_engine_to_faiss(self, mocked_document_store):
        mocked_document_store.knn_engine = "faiss"
        with pytest.raises(
            DocumentStoreError,
            match=f"Existing embedding field '{mocked_document_store.embedding_field}' of OpenSearch index '{self.index_name}' has knn_engine 'nmslib', but knn_engine was set to 'faiss'.",
        ):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_change_knn_engine_to_score_script(self, mocked_document_store):
        mocked_document_store.knn_engine = "score_script"
        mocked_document_store.space_type = "cosinesimil"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_adjusts_ef_search_for_hnsw_when_default(
        self, mocked_document_store, existing_index
    ):
        """
        Test adjustment when `knn.algo_param` is missing from the index settings
        """
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["ef_construction"] = 80
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["m"] = 64
        mocked_document_store.index_type = "hnsw"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        # assert the resulting body contains the adjusted params
        _, kwargs = mocked_document_store.client.indices.put_settings.call_args
        assert kwargs["body"] == {"knn.algo_param.ef_search": 20}

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_adjusts_ef_search_for_hnsw_when_set_different(
        self, mocked_document_store, existing_index
    ):
        """
        Test a value of `knn.algo_param` that needs to be adjusted
        """
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["ef_construction"] = 80
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["m"] = 64
        existing_index["settings"]["index"]["knn.algo_param"] = {"ef_search": 999}
        mocked_document_store.index_type = "hnsw"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        # assert the resulting body is contains the adjusted params
        _, kwargs = mocked_document_store.client.indices.put_settings.call_args
        assert kwargs["body"] == {"knn.algo_param.ef_search": 20}

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_ignores_index_setting_ef_search_for_faiss(
        self, mocked_document_store, existing_index
    ):
        mocked_document_store.knn_engine = "faiss"
        existing_index["mappings"]["properties"]["embedding"]["method"]["engine"] = "faiss"
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["ef_construction"] = 512
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["m"] = 16
        existing_index["settings"]["index"]["knn.algo_param"] = {"ef_search": 999}

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        mocked_document_store.client.indices.put_settings.assert_not_called()

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_ignores_parameter_ef_search_for_nmslib(
        self, mocked_document_store, existing_index
    ):
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["ef_construction"] = 512
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["m"] = 16
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["ef_search"] = 999
        existing_index["settings"]["index"]["knn.algo_param"] = {"ef_search": 512}

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        mocked_document_store.client.indices.put_settings.assert_not_called()

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_does_not_adjust_ef_search_for_hnsw_when_set_correct(
        self, mocked_document_store, existing_index
    ):
        """
        If params are already set correctly, we should not adjust them.
        """
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["ef_construction"] = 80
        existing_index["mappings"]["properties"]["embedding"]["method"]["parameters"]["m"] = 64
        existing_index["settings"]["index"]["knn.algo_param"] = {"ef_search": 20}
        mocked_document_store.index_type = "hnsw"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        mocked_document_store.client.indices.put_settings.assert_not_called()

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_adjusts_ef_search_for_flat_when_set_different(
        self, mocked_document_store, existing_index
    ):
        """
        Test a value of `knn.algo_param` that needs to be adjusted
        """
        existing_index["settings"]["index"]["knn.algo_param"] = {"ef_search": 999}
        mocked_document_store.index_type = "flat"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)

        # assert the resulting body is contains the adjusted params
        _, kwargs = mocked_document_store.client.indices.put_settings.call_args
        assert kwargs["body"] == {"knn.algo_param.ef_search": 512}

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_does_not_adjust_ef_search_for_flat_when_default(
        self, mocked_document_store
    ):
        """
        If `knn.algo_param` is missing, default value needs no adjustments
        """
        mocked_document_store.index_type = "flat"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)
        mocked_document_store.client.indices.put_settings.assert_not_called()

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_does_not_adjust_ef_search_for_flat_when_set_correct(
        self, mocked_document_store, existing_index
    ):
        """
        If `knn.algo_param` is correct, value needs no adjustments
        """
        existing_index["settings"]["index"]["knn.algo_param"] = {"ef_search": 512}
        mocked_document_store.index_type = "flat"

        mocked_document_store._validate_and_adjust_document_index(self.index_name)
        mocked_document_store.client.indices.put_settings.assert_not_called()

    @pytest.mark.unit
    def test__validate_and_adjust_document_index_with_non_existing_index(self, mocked_document_store, caplog):
        mocked_document_store.client.indices.get.return_value = {}
        with caplog.at_level(logging.WARNING):
            mocked_document_store._validate_and_adjust_document_index(self.index_name)
            assert f"The index '{self.index_name}' doesn't exist. " in caplog.text

    @pytest.mark.unit
    @pytest.mark.parametrize("create_index", [True, False])
    @pytest.mark.parametrize("recreate_index", [True, False])
    def test__init_indices_always_calls_validation_if_no_custom_mapping(
        self, mocked_document_store, create_index, recreate_index
    ):
        mocked_document_store._validate_and_adjust_document_index = MagicMock()
        mocked_document_store._init_indices(self.index_name, "label_index", create_index, recreate_index)

        mocked_document_store._validate_and_adjust_document_index.assert_called_once()

    @pytest.mark.unit
    @pytest.mark.parametrize("create_index", [True, False])
    @pytest.mark.parametrize("recreate_index", [True, False])
    def test__init_indices_never_calls_validation_if_custom_mapping(
        self, mocked_document_store, create_index, recreate_index, caplog
    ):
        mocked_document_store.custom_mapping = {
            "mappings": {"properties": {"embedding": {"type": "dense_vector", "dims": 768}}}
        }
        mocked_document_store._validate_and_adjust_document_index = MagicMock()

        with caplog.at_level(logging.WARNING):
            mocked_document_store._init_indices(self.index_name, "label_index", create_index, recreate_index)
            assert "Skipping index validation" in caplog.text
            mocked_document_store._validate_and_adjust_document_index.assert_not_called()

    @pytest.mark.unit
    def test__init_indices_creates_index_if_not_exists(self, mocked_document_store):
        mocked_document_store.client.indices.exists.return_value = False
        mocked_document_store._init_indices(self.index_name, "label_index", create_index=True, recreate_index=False)

        mocked_document_store.client.indices.create.assert_called()

    @pytest.mark.unit
    def test__init_indices_does_not_create_index_if_exists(self, mocked_document_store):
        mocked_document_store._init_indices(self.index_name, "label_index", create_index=True, recreate_index=False)

        mocked_document_store.client.indices.create.assert_not_called()

    @pytest.mark.unit
    def test__init_indices_does_not_create_index_if_not_create_index(self, mocked_document_store):
        mocked_document_store.client.indices.exists.return_value = False
        mocked_document_store._init_indices(self.index_name, "label_index", create_index=False, recreate_index=False)

        mocked_document_store.client.indices.create.assert_not_called()

    @pytest.mark.unit
    def test__init_indices_creates_index_if_exists_and_recreate_index(self, mocked_document_store):
        # delete_index asks four times: one check for doc index, one check for label index
        # + one check for both if ivf model exists
        # create_index asks two times: one for doc index, one for label index
        mocked_document_store.client.indices.exists.side_effect = [True, False, True, False, False, False]
        mocked_document_store._init_indices(self.index_name, "label_index", create_index=True, recreate_index=True)

        mocked_document_store.client.indices.delete.assert_called()
        mocked_document_store.client.indices.create.assert_called()

    @pytest.mark.unit
    def test__create_document_index_no_index_custom_mapping(self, mocked_document_store):
        mocked_document_store.custom_mapping = {"mappings": {"properties": {"a_number": {"type": "integer"}}}}

        mocked_document_store._create_document_index(self.index_name)
        _, kwargs = mocked_document_store.client.indices.create.call_args
        assert kwargs["body"] == {"mappings": {"properties": {"a_number": {"type": "integer"}}}}
        assert mocked_document_store.knn_engine == "nmslib"
        assert mocked_document_store.space_type == "innerproduct"

    @pytest.mark.unit
    def test__create_document_index_no_index_no_mapping(self, mocked_document_store):
        mocked_document_store._create_document_index(self.index_name)
        _, kwargs = mocked_document_store.client.indices.create.call_args
        assert kwargs["body"] == {
            "mappings": {
                "dynamic_templates": [
                    {"strings": {"mapping": {"type": "keyword"}, "match_mapping_type": "string", "path_match": "*"}}
                ],
                "properties": {
                    "content": {"type": "text"},
                    "embedding": {
                        "dimension": 768,
                        "method": {
                            "engine": "nmslib",
                            "name": "hnsw",
                            "parameters": {"ef_construction": 512, "m": 16},
                            "space_type": "innerproduct",
                        },
                        "type": "knn_vector",
                    },
                    "name": {"type": "keyword"},
                },
            },
            "settings": {"analysis": {"analyzer": {"default": {"type": "standard"}}}, "index": {"knn": True}},
        }
        assert mocked_document_store.knn_engine == "nmslib"
        assert mocked_document_store.space_type == "innerproduct"

    @pytest.mark.unit
    def test__create_document_index_no_index_no_mapping_with_synonyms(self, mocked_document_store):
        mocked_document_store.search_fields = ["occupation"]
        mocked_document_store.synonyms = ["foo"]

        mocked_document_store._create_document_index(self.index_name)
        _, kwargs = mocked_document_store.client.indices.create.call_args
        assert kwargs["body"] == {
            "mappings": {
                "properties": {
                    "name": {"type": "keyword"},
                    "content": {"type": "text", "analyzer": "synonym"},
                    "occupation": {"type": "text", "analyzer": "synonym"},
                    "embedding": {
                        "type": "knn_vector",
                        "dimension": 768,
                        "method": {
                            "space_type": "innerproduct",
                            "name": "hnsw",
                            "engine": "nmslib",
                            "parameters": {"ef_construction": 512, "m": 16},
                        },
                    },
                },
                "dynamic_templates": [
                    {"strings": {"path_match": "*", "match_mapping_type": "string", "mapping": {"type": "keyword"}}}
                ],
            },
            "settings": {
                "analysis": {
                    "analyzer": {
                        "default": {"type": "standard"},
                        "synonym": {"tokenizer": "whitespace", "filter": ["lowercase", "synonym"]},
                    },
                    "filter": {"synonym": {"type": "synonym", "synonyms": ["foo"]}},
                },
                "index": {"knn": True},
            },
        }
        assert mocked_document_store.knn_engine == "nmslib"
        assert mocked_document_store.space_type == "innerproduct"

    @pytest.mark.unit
    def test__create_document_index_no_index_no_mapping_with_embedding_field(self, mocked_document_store):
        mocked_document_store.embedding_field = "vec"
        mocked_document_store.index_type = "hnsw"

        mocked_document_store._create_document_index(self.index_name)
        _, kwargs = mocked_document_store.client.indices.create.call_args
        assert kwargs["body"] == {
            "mappings": {
                "properties": {
                    "name": {"type": "keyword"},
                    "content": {"type": "text"},
                    "vec": {
                        "type": "knn_vector",
                        "dimension": 768,
                        "method": {
                            "space_type": "innerproduct",
                            "name": "hnsw",
                            "engine": "nmslib",
                            "parameters": {"ef_construction": 80, "m": 64},
                        },
                    },
                },
                "dynamic_templates": [
                    {"strings": {"path_match": "*", "match_mapping_type": "string", "mapping": {"type": "keyword"}}}
                ],
            },
            "settings": {
                "analysis": {"analyzer": {"default": {"type": "standard"}}},
                "index": {"knn": True, "knn.algo_param.ef_search": 20},
            },
        }
        assert mocked_document_store.knn_engine == "nmslib"
        assert mocked_document_store.space_type == "innerproduct"

    @pytest.mark.unit
    def test__create_document_index_no_index_no_mapping_faiss(self, mocked_document_store):
        mocked_document_store.knn_engine = "faiss"
        mocked_document_store._create_document_index(self.index_name)
        _, kwargs = mocked_document_store.client.indices.create.call_args
        assert kwargs["body"] == {
            "mappings": {
                "dynamic_templates": [
                    {"strings": {"mapping": {"type": "keyword"}, "match_mapping_type": "string", "path_match": "*"}}
                ],
                "properties": {
                    "content": {"type": "text"},
                    "embedding": {
                        "dimension": 768,
                        "method": {
                            "engine": "faiss",
                            "name": "hnsw",
                            "parameters": {"ef_construction": 512, "m": 16},
                            "space_type": "innerproduct",
                        },
                        "type": "knn_vector",
                    },
                    "name": {"type": "keyword"},
                },
            },
            "settings": {"analysis": {"analyzer": {"default": {"type": "standard"}}}, "index": {"knn": True}},
        }

    @pytest.mark.unit
    def test__create_document_index_client_failure(self, mocked_document_store):
        mocked_document_store.client.indices.exists.return_value = False
        mocked_document_store.client.indices.create.side_effect = RequestError

        with pytest.raises(RequestError):
            mocked_document_store._create_document_index(self.index_name)

    @pytest.mark.unit
    def test__get_embedding_field_mapping_flat(self, mocked_document_store):
        mocked_document_store.index_type = "flat"

        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
                "space_type": "innerproduct",
                "name": "hnsw",
                "engine": "nmslib",
                "parameters": {"ef_construction": 512, "m": 16},
            },
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_default_hnsw(self, mocked_document_store):
        mocked_document_store.index_type = "hnsw"

        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
                "space_type": "innerproduct",
                "name": "hnsw",
                "engine": "nmslib",
                "parameters": {"ef_construction": 80, "m": 64},
            },
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_default_hnsw_faiss(self, mocked_document_store):
        mocked_document_store.index_type = "hnsw"
        mocked_document_store.knn_engine = "faiss"

        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
                "space_type": "innerproduct",
                "name": "hnsw",
                "engine": "faiss",
                "parameters": {"ef_construction": 80, "m": 64, "ef_search": 20},
            },
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_custom_hnsw(self, mocked_document_store):
        mocked_document_store.index_type = "hnsw"
        mocked_document_store.knn_parameters = {"ef_construction": 1, "m": 2}

        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
                "space_type": "innerproduct",
                "engine": "nmslib",
                "name": "hnsw",
                "parameters": {"ef_construction": 1, "m": 2},
            },
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_custom_hnsw_faiss(self, mocked_document_store):
        mocked_document_store.index_type = "hnsw"
        mocked_document_store.knn_engine = "faiss"
        mocked_document_store.knn_parameters = {"ef_construction": 1, "m": 2, "ef_search": 3}

        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
                "space_type": "innerproduct",
                "engine": "faiss",
                "name": "hnsw",
                "parameters": {"ef_construction": 1, "m": 2, "ef_search": 3},
            },
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_ivf(self, mocked_document_store):
        mocked_document_store.index_type = "ivf"
        mocked_document_store.knn_engine = "faiss"
        mocked_document_store.client.indices.exists.return_value = False

        # Before training, IVF indices use HNSW with default settings
        assert mocked_document_store._get_embedding_field_mapping() == {"type": "knn_vector", "dimension": 768}

        # Assume we have trained the index
        mocked_document_store.client.indices.exists.return_value = True
        mocked_document_store.client.transport.perform_request.return_value = {
            "took": 4,
            "timed_out": False,
            "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
            "hits": {
                "total": {"value": 1, "relation": "eq"},
                "max_score": 1.0,
                "hits": [
                    {
                        "_index": ".opensearch-knn-models",
                        "_type": "_doc",
                        "_id": "document-ivf",
                        "_score": 1.0,
                        "_source": {
                            "model_blob": "<SOME MODEL BLOB>",
                            "engine": "faiss",
                            "space_type": "innerproduct",
                            "description": "index_type:ivf nlist:4 nprobes:1",
                            "model_id": f"{mocked_document_store.index}-ivf",
                            "state": "created",
                            "error": "",
                            "dimension": 768,
                            "timestamp": "2023-01-25T16:04:21.284398Z",
                        },
                    }
                ],
            },
        }
        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "model_id": f"{mocked_document_store.index}-ivf",
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_ivfpq(self, mocked_document_store):
        mocked_document_store.index_type = "ivf_pq"
        mocked_document_store.knn_engine = "faiss"
        mocked_document_store.client.indices.exists.return_value = False

        # Before training, IVF indices use HNSW with default settings
        assert mocked_document_store._get_embedding_field_mapping() == {"type": "knn_vector", "dimension": 768}

        # Assume we have trained the index
        mocked_document_store.client.indices.exists.return_value = True
        mocked_document_store.client.transport.perform_request.return_value = {
            "took": 4,
            "timed_out": False,
            "_shards": {"total": 1, "successful": 1, "skipped": 0, "failed": 0},
            "hits": {
                "total": {"value": 1, "relation": "eq"},
                "max_score": 1.0,
                "hits": [
                    {
                        "_index": ".opensearch-knn-models",
                        "_type": "_doc",
                        "_id": "document-ivf",
                        "_score": 1.0,
                        "_source": {
                            "model_blob": "<SOME MODEL BLOB>",
                            "engine": "faiss",
                            "space_type": "innerproduct",
                            "description": "index_type:ivf_pq nlist:4 nprobes:1 m:1 code_size:8",
                            "model_id": f"{mocked_document_store.index}-ivf",
                            "state": "created",
                            "error": "",
                            "dimension": 768,
                            "timestamp": "2023-01-25T16:04:21.284398Z",
                        },
                    }
                ],
            },
        }
        assert mocked_document_store._get_embedding_field_mapping() == {
            "type": "knn_vector",
            "model_id": f"{mocked_document_store.index}-ivf",
        }

    @pytest.mark.unit
    def test__get_embedding_field_mapping_wrong(self, mocked_document_store, caplog):
        mocked_document_store.index_type = "foo"

        with caplog.at_level(logging.ERROR, logger="haystack.document_stores.opensearch"):
            retval = mocked_document_store._get_embedding_field_mapping()

        assert "Set index_type to either 'flat', 'hnsw', 'ivf', or 'ivf_pq'" in caplog.text
        assert retval == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {"space_type": "innerproduct", "name": "hnsw", "engine": "nmslib"},
        }

    @pytest.mark.unit
    def test__create_label_index_already_exists(self, mocked_document_store):
        mocked_document_store.client.indices.exists.return_value = True

        mocked_document_store._init_indices("doc_index", "label_index", True, False)
        mocked_document_store.client.indices.create.assert_not_called()

    @pytest.mark.unit
    def test__create_label_index_client_error(self, mocked_document_store):
        mocked_document_store.client.indices.exists.return_value = False
        mocked_document_store.client.indices.create.side_effect = RequestError

        with pytest.raises(RequestError):
            mocked_document_store._create_label_index("foo")

    @pytest.mark.unit
    def test__get_vector_similarity_query_support_true(self, mocked_document_store):
        mocked_document_store.embedding_field = "FooField"
        assert mocked_document_store.knn_engine != "score_script"

        assert mocked_document_store._get_vector_similarity_query(self.query_emb, 3) == {
            "bool": {"must": [{"knn": {"FooField": {"vector": self.query_emb.tolist(), "k": 3}}}]}
        }

    @pytest.mark.unit
    def test__get_vector_similarity_query_support_false(self, mocked_document_store):
        mocked_document_store.embedding_field = "FooField"
        mocked_document_store.knn_engine = "score_script"
        mocked_document_store.space_type = "innerproduct"

        assert mocked_document_store._get_vector_similarity_query(self.query_emb, 3) == {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "knn_score",
                    "lang": "knn",
                    "params": {
                        "field": "FooField",
                        "query_value": self.query_emb.tolist(),
                        "space_type": "innerproduct",
                    },
                },
            }
        }

    @pytest.mark.unit
    def test__get_raw_similarity_score_dot(self, mocked_document_store):
        mocked_document_store.similarity = "dot_product"
        assert mocked_document_store._get_raw_similarity_score(2) == 1
        assert mocked_document_store._get_raw_similarity_score(-2) == 1.5

    @pytest.mark.unit
    def test__get_raw_similarity_score_l2(self, mocked_document_store):
        mocked_document_store.similarity = "l2"
        assert mocked_document_store._get_raw_similarity_score(1) == 0

    @pytest.mark.unit
    def test__get_raw_similarity_score_cosine(self, mocked_document_store):
        mocked_document_store.space_type = "cosinesimil"
        assert mocked_document_store.knn_engine != "score_script"
        assert mocked_document_store._get_raw_similarity_score(1) == 1
        mocked_document_store.knn_engine = "score_script"
        assert mocked_document_store._get_raw_similarity_score(1) == 0

    @pytest.mark.unit
    def test_clone_embedding_field_duplicate_mapping(self, mocked_document_store):
        mocked_document_store.index = self.index_name
        with pytest.raises(Exception, match="embedding already exists with mapping"):
            mocked_document_store.clone_embedding_field("embedding", "cosine")

    @pytest.mark.unit
    def test_clone_embedding_field_update_mapping(self, mocked_document_store, monkeypatch):
        mocked_document_store.index = self.index_name

        # Mock away tqdm and the batch logic so we can test the mapping update alone
        mocked_document_store._get_all_documents_in_index = MagicMock(return_value=[])
        monkeypatch.setattr(tqdm, "__new__", MagicMock())

        mocked_document_store.clone_embedding_field("a_field", "cosine")
        _, kwargs = mocked_document_store.client.indices.put_mapping.call_args
        assert kwargs["body"]["properties"]["a_field"] == {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
                "space_type": "cosinesimil",
                "name": "hnsw",
                "engine": "nmslib",
                "parameters": {"ef_construction": 512, "m": 16},
            },
        }

    @pytest.mark.unit
    def test_bulk_write_retries_for_always_failing_insert_is_canceled(self, mocked_document_store, monkeypatch, caplog):
        docs_to_write = [
            {"meta": {"name": f"name_{i}"}, "content": f"text_{i}", "embedding": np.random.rand(768).astype(np.float32)}
            for i in range(1000)
        ]

        with patch("haystack.document_stores.opensearch.bulk") as mocked_bulk:
            mocked_bulk.side_effect = opensearchpy.TransportError(429, "Too many requests")

            with pytest.raises(DocumentStoreError, match="Last try of bulk indexing documents failed."):
                mocked_document_store._bulk(documents=docs_to_write, _timeout=0, _remaining_tries=3)

            assert mocked_bulk.call_count == 3  # depth first search fails and cancels the whole bulk request

            assert "Too Many Requests" in caplog.text
            assert " Splitting the number of documents into two chunks with the same size" in caplog.text

    @pytest.mark.unit
    def test_bulk_write_retries_with_backoff_with_smaller_batch_size_on_too_many_requests(
        self, mocked_document_store, monkeypatch
    ):
        docs_to_write = [
            {"meta": {"name": f"name_{i}"}, "content": f"text_{i}", "embedding": np.random.rand(768).astype(np.float32)}
            for i in range(1000)
        ]

        with patch("haystack.document_stores.opensearch.bulk") as mocked_bulk:
            # make bulk insert split documents and request retries s.t.
            # 1k => 500 (failed) + 500 (successful) => 250 (successful) + 250 (successful)
            # resulting in 5 calls in total
            mocked_bulk.side_effect = [
                opensearchpy.TransportError(429, "Too many requests"),
                opensearchpy.TransportError(429, "Too many requests"),
                None,
                None,
                None,
            ]
            mocked_document_store._bulk(documents=docs_to_write, _timeout=0, _remaining_tries=3)
            assert mocked_bulk.call_count == 5

    @pytest.mark.unit
    def test_get_document_by_id_return_embedding_false(self, mocked_document_store):
        mocked_document_store.return_embedding = False
        mocked_document_store.get_document_by_id("123")
        # assert the resulting body is consistent with the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.search.call_args
        assert kwargs["body"]["_source"] == {"excludes": ["embedding"]}

    @pytest.mark.unit
    def test_get_document_by_id_excluded_meta_data_has_no_influence(self, mocked_document_store):
        mocked_document_store.excluded_meta_data = ["foo"]
        mocked_document_store.return_embedding = False
        mocked_document_store.get_document_by_id("123")
        # assert the resulting body is not affected by the `excluded_meta_data` value
        _, kwargs = mocked_document_store.client.search.call_args
        assert kwargs["body"]["_source"] == {"excludes": ["embedding"]}

    @pytest.mark.unit
    def test_write_documents_req_for_each_batch(self, mocked_document_store, documents):
        mocked_document_store.batch_size = 2
        with patch("haystack.document_stores.opensearch.bulk") as mocked_bulk:
            mocked_document_store.write_documents(documents)
            assert mocked_bulk.call_count == 5