Document Store test refactoring (#3449)

* add new marker * start using test hierarchies * move ES tests into their own class * refactor test workflow * job steps * add more tests * move more tests * more tests * test labels * add more tests * Update tests.yml * Update tests.yml * fix * typo * fix es image tag * map es ports * try * fix * default port * remove opensearch from the markers sorcery * revert * skip new tests in old jobs * skip opensearch_faiss
2025-11-03 03:09:28 +00:00 · 2022-10-31 15:30:14 +01:00 · 2022-10-31 15:30:14 +01:00 · b694c7b5cb
commit b694c7b5cb
parent 85cdc1040a
9 changed files with 845 additions and 560 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -92,17 +92,22 @@ jobs:
      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

  unit-tests:
-    name: Unit / ${{ matrix.os }}
+    name: Unit / ${{ matrix.topic }} / ${{ matrix.os }}
    needs:
      - mypy
      - pylint
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macos-latest
+        topic:
+          - document_stores
    runs-on: ${{ matrix.os }}
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

      - name: Setup Python
        uses: ./.github/actions/python_cache/
@ -111,7 +116,7 @@ jobs:
        run: pip install .[all]

      - name: Run
-        run: pytest -m "unit" test/
+        run: pytest -m "unit" test/${{ matrix.topic }}

      - uses: act10ns/slack@v1
        with:
@ -119,6 +124,86 @@ jobs:
          channel: '#haystack'
        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

+  integration-tests-elasticsearch:
+    name: Integration / Elasticsearch / ${{ matrix.os }}
+    needs:
+     - unit-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    services:
+      elasticsearch:
+        image: elasticsearch:7.17.6
+        env:
+          discovery.type: "single-node"
+          ES_JAVA_OPTS: "-Xms128m -Xmx256m"
+        ports:
+          - 9200:9200
+    # env:
+    #   ELASTICSEARCH_HOST: "elasticsearch"
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: ./.github/actions/python_cache/
+
+      - name: Install Haystack
+        run: pip install -U .[docstores]
+
+      - name: Run tests
+        run: |
+          pytest -x -m "document_store and integration" test/document_stores/test_elasticsearch.py
+
+      - uses: act10ns/slack@v1
+        with:
+          status: ${{ job.status }}
+          channel: '#haystack'
+        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+
+  integration-tests-opensearch:
+    name: Integration / Opensearch / ${{ matrix.os }}
+    needs:
+     - unit-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    services:
+      opensearch:
+        image: opensearchproject/opensearch:1.3.5
+        env:
+          discovery.type: "single-node"
+          ES_JAVA_OPTS: "-Xms128m -Xmx256m"
+        ports:
+          - 9200:9200
+    # env:
+    #   OPENSEARCH_HOST: "opensearch"
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: ./.github/actions/python_cache/
+
+      - name: Install Haystack
+        run: pip install -U .[docstores]
+
+      - name: Run tests
+        run: |
+          pytest -x -m "document_store and integration" test/document_stores/test_opensearch.py
+
+      - uses: act10ns/slack@v1
+        with:
+          status: ${{ job.status }}
+          channel: '#haystack'
+        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+
+#
+# TODO: the following steps need to be revisited
+#
+
  unit-tests-linux:
    needs:
     - mypy
@ -216,117 +301,6 @@ jobs:
        channel: '#haystack'
      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

-  elasticsearch-tests-linux:
-    needs:
-     - mypy
-     - pylint
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Setup Elasticsearch
-      run: |
-        docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
-
-      # TODO Let's try to remove this one from the unit tests
-    - name: Install pdftotext
-      run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
-
-    - name: Setup Python
-      uses: ./.github/actions/python_cache/
-
-    - name: Install Haystack
-      run: pip install .
-
-    - name: Run tests
-      env:
-        TOKENIZERS_PARALLELISM: 'false'
-      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ --document_store_type=elasticsearch
-
-    - name: Dump docker logs on failure
-      if: failure()
-      uses: jwalton/gh-docker-logs@v1
-
-    - uses: act10ns/slack@v1
-      with:
-        status: ${{ job.status }}
-        channel: '#haystack'
-      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
-  elasticsearch-tests-windows:
-    needs:
-     - mypy
-     - pylint
-    runs-on: windows-latest
-    if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft
-
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Install dependencies
-      run: |
-        choco install --no-progress xpdf-utils
-        choco install --no-progress openjdk --version=11.0.2.01
-        refreshenv
-        choco install --no-progress elasticsearch --version=7.9.2
-        refreshenv
-        Get-Service elasticsearch-service-x64 | Start-Service
-
-    - name: Setup Python
-      uses: ./.github/actions/python_cache/
-      with:
-        prefix: windows
-
-    - name: Run tests
-      env:
-        TOKENIZERS_PARALLELISM: 'false'
-      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "elasticsearch and not integration" test/document_stores/ ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} --document_store_type=elasticsearch
-
-    - uses: act10ns/slack@v1
-      with:
-        status: ${{ job.status }}
-        channel: '#haystack'
-      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
-  opensearch-tests-linux:
-    needs:
-     - mypy
-     - pylint
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Setup Opensearch
-      run: |
-        docker run -d -p 9201:9200 -p 9600:9600 -e "discovery.type=single-node" opensearchproject/opensearch:1.3.5
-
-      # TODO Let's try to remove this one from the unit tests
-    - name: Install pdftotext
-      run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
-
-    - name: Setup Python
-      uses: ./.github/actions/python_cache/
-
-    - name: Install Haystack
-      run: pip install .
-
-    - name: Run tests
-      env:
-        TOKENIZERS_PARALLELISM: 'false'
-      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "opensearch and not integration" test/document_stores/test_document_store.py --document_store_type=opensearch
-
-    - name: Dump docker logs on failure
-      if: failure()
-      uses: jwalton/gh-docker-logs@v1
-
-    - uses: act10ns/slack@v1
-      with:
-        status: ${{ job.status }}
-        channel: '#haystack'
-      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

  faiss-tests-linux:
    needs:
@ -656,7 +630,6 @@ jobs:
  integration-tests-linux:
    needs:
      - unit-tests-linux
-      - elasticsearch-tests-linux

    timeout-minutes: 60
    strategy:
@ -691,7 +664,6 @@ jobs:
      run: |
         python -c "from transformers import AutoModel;[AutoModel.from_pretrained(model_name) for model_name in ['vblagoje/bart_lfqa','yjernite/bart_eli5', 'vblagoje/dpr-ctx_encoder-single-lfqa-wiki', 'vblagoje/dpr-question_encoder-single-lfqa-wiki', 'facebook/dpr-question_encoder-single-nq-base', 'facebook/dpr-ctx_encoder-single-nq-base', 'elastic/distilbert-base-cased-finetuned-conll03-english', 'deepset/bert-medium-squad2-distilled']]"

-
    - name: Run Elasticsearch
      run: |
        docker run -d -p 9200:9200 -e "discovery.type=single-node" -e "ES_JAVA_OPTS=-Xms128m -Xmx256m" elasticsearch:7.9.2
@ -738,8 +710,9 @@ jobs:
    - name: Run tests
      env:
        TOKENIZERS_PARALLELISM: 'false'  # Avoid logspam by tokenizers
+      # we add "and not document_store" to exclude the tests that were ported to the new strategy
      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "integration" test/${{ matrix.folder }}
+        pytest ${{ env.PYTEST_PARAMS }} -m "integration and not document_store" test/${{ matrix.folder }}

    - name: Dump docker logs on failure
      if: failure()
@ -754,7 +727,6 @@ jobs:
  integration-tests-windows:
    needs:
     - unit-tests-windows
-     - elasticsearch-tests-windows
    runs-on: windows-latest
    if: contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft

@ -800,4 +772,4 @@ jobs:
      with:
        status: ${{ job.status }}
        channel: '#haystack'
-      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
--- a/conftest.py
+++ b/conftest.py
@ -2,7 +2,7 @@ def pytest_addoption(parser):
    parser.addoption(
        "--document_store_type",
        action="store",
-        default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone, opensearch",
+        default="elasticsearch, faiss, sql, memory, milvus1, milvus, weaviate, pinecone",
    )


--- a/pyproject.toml
+++ b/pyproject.toml
@ -351,6 +351,7 @@ markers = [
  "milvus: requires a Milvus 2 setup",
  "milvus1: requires a Milvus 1 container",
  "opensearch",
+  "document_store",
 ]
 log_cli = true

--- a/test/conftest.py
+++ b/test/conftest.py
@ -152,7 +152,6 @@ def pytest_collection_modifyitems(config, items):
        "pinecone": [pytest.mark.pinecone],
        # FIXME GraphDB can't be treated as a regular docstore, it fails most of their tests
        "graphdb": [pytest.mark.integration],
-        "opensearch": [pytest.mark.opensearch],
    }
    for item in items:
        for name, markers in name_to_markers.items():
@ -196,17 +195,7 @@ def infer_required_doc_store(item, keywords):
    # 2. if the test name contains the docstore name, we use that
    # 3. use an arbitrary one by calling set.pop()
    required_doc_store = None
-    all_doc_stores = {
-        "elasticsearch",
-        "faiss",
-        "sql",
-        "memory",
-        "milvus1",
-        "milvus",
-        "weaviate",
-        "pinecone",
-        "opensearch",
-    }
+    all_doc_stores = {"elasticsearch", "faiss", "sql", "memory", "milvus1", "milvus", "weaviate", "pinecone"}
    docstore_markers = set(keywords).intersection(all_doc_stores)
    if len(docstore_markers) > 1:
        # if parameterized infer the docstore from the parameter
@ -1109,18 +1098,6 @@ def get_document_store(
            knn_engine="faiss",
        )

-    elif document_store_type == "opensearch":
-        document_store = OpenSearchDocumentStore(
-            index=index,
-            return_embedding=True,
-            embedding_dim=embedding_dim,
-            embedding_field=embedding_field,
-            similarity=similarity,
-            recreate_index=recreate_index,
-            port=9201,
-            knn_engine="nmslib",
-        )
-
    else:
        raise Exception(f"No document store fixture for '{document_store_type}'")

--- a/test/document_stores/test_base.py
+++ b/test/document_stores/test_base.py
@ -0,0 +1,445 @@
+import pytest
+import numpy as np
+
+from haystack.schema import Document, Label, Answer
+from haystack.errors import DuplicateDocumentError
+from haystack.document_stores import BaseDocumentStore
+
+
+@pytest.mark.document_store
+class DocumentStoreBaseTestAbstract:
+    """
+    This is a base class to test abstract methods from DocumentStoreBase to be inherited by any Document Store
+    testsuite. It doesn't have the `Test` prefix in the name so that its methods won't be collected for this
+    class but only for its subclasses.
+    """
+
+    @pytest.fixture
+    def documents(self):
+        documents = []
+        for i in range(3):
+            documents.append(
+                Document(
+                    content=f"A Foo Document {i}",
+                    meta={"name": f"name_{i}", "year": "2020", "month": "01", "numbers": [2, 4]},
+                    embedding=np.random.rand(768).astype(np.float32),
+                )
+            )
+
+            documents.append(
+                Document(
+                    content=f"A Bar Document {i}",
+                    meta={"name": f"name_{i}", "year": "2021", "month": "02", "numbers": [-2, -4]},
+                    embedding=np.random.rand(768).astype(np.float32),
+                )
+            )
+
+            documents.append(
+                Document(
+                    content=f"Document {i} without embeddings",
+                    meta={"name": f"name_{i}", "no_embedding": True, "month": "03"},
+                )
+            )
+
+        return documents
+
+    @pytest.fixture
+    def labels(self, documents):
+        labels = []
+        for i, d in enumerate(documents):
+            labels.append(
+                Label(
+                    query=f"query_{i}",
+                    document=d,
+                    is_correct_document=True,
+                    is_correct_answer=False,
+                    # create a mix set of labels
+                    origin="user-feedback" if i % 2 else "gold-label",
+                    answer=None if not i else Answer(f"the answer is {i}"),
+                    meta={"name": f"label_{i}", "year": f"{2020 + i}"},
+                )
+            )
+        return labels
+
+    #
+    # Integration tests
+    #
+
+    @pytest.mark.integration
+    def test_write_documents(self, ds, documents):
+        ds.write_documents(documents)
+        docs = ds.get_all_documents()
+        assert len(docs) == len(documents)
+        for i, doc in enumerate(docs):
+            expected = documents[i]
+            assert doc.id == expected.id
+
+    @pytest.mark.integration
+    def test_write_labels(self, ds, labels):
+        ds.write_labels(labels)
+        assert ds.get_all_labels() == labels
+
+    @pytest.mark.integration
+    def test_write_with_duplicate_doc_ids(self, ds):
+        duplicate_documents = [
+            Document(content="Doc1", id_hash_keys=["content"]),
+            Document(content="Doc1", id_hash_keys=["content"]),
+        ]
+        ds.write_documents(duplicate_documents, duplicate_documents="skip")
+        assert len(ds.get_all_documents()) == 1
+        with pytest.raises(Exception):
+            ds.write_documents(duplicate_documents, duplicate_documents="fail")
+
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_get_all_documents_without_filters(self, ds, documents):
+        ds.write_documents(documents)
+        out = ds.get_all_documents()
+        assert out == documents
+
+    @pytest.mark.integration
+    def test_get_all_document_filter_duplicate_text_value(self, ds):
+        documents = [
+            Document(content="duplicated", meta={"meta_field": "0"}, id_hash_keys=["meta"]),
+            Document(content="duplicated", meta={"meta_field": "1", "name": "file.txt"}, id_hash_keys=["meta"]),
+            Document(content="Doc2", meta={"name": "file_2.txt"}, id_hash_keys=["meta"]),
+        ]
+        ds.write_documents(documents)
+        documents = ds.get_all_documents(filters={"meta_field": ["1"]})
+        assert len(documents) == 1
+        assert documents[0].content == "duplicated"
+        assert documents[0].meta["name"] == "file.txt"
+
+        documents = ds.get_all_documents(filters={"meta_field": ["0"]})
+        assert len(documents) == 1
+        assert documents[0].content == "duplicated"
+        assert documents[0].meta.get("name") is None
+
+        documents = ds.get_all_documents(filters={"name": ["file_2.txt"]})
+        assert len(documents) == 1
+        assert documents[0].content == "Doc2"
+        assert documents[0].meta.get("meta_field") is None
+
+    @pytest.mark.integration
+    def test_get_all_documents_with_correct_filters(self, ds, documents):
+        ds.write_documents(documents)
+        result = ds.get_all_documents(filters={"year": ["2020"]})
+        assert len(result) == 3
+
+        documents = ds.get_all_documents(filters={"year": ["2020", "2021"]})
+        assert len(documents) == 6
+
+    @pytest.mark.integration
+    def test_get_all_documents_with_incorrect_filter_name(self, ds, documents):
+        ds.write_documents(documents)
+        result = ds.get_all_documents(filters={"non_existing_meta_field": ["whatever"]})
+        assert len(result) == 0
+
+    @pytest.mark.integration
+    def test_get_all_documents_with_incorrect_filter_value(self, ds, documents):
+        ds.write_documents(documents)
+        result = ds.get_all_documents(filters={"year": ["nope"]})
+        assert len(result) == 0
+
+    @pytest.mark.integration
+    def test_extended_filter(self, ds, documents):
+        ds.write_documents(documents)
+
+        # Test comparison operators individually
+
+        result = ds.get_all_documents(filters={"year": {"$eq": "2020"}})
+        assert len(result) == 3
+        result = ds.get_all_documents(filters={"year": "2020"})
+        assert len(result) == 3
+
+        result = ds.get_all_documents(filters={"year": {"$in": ["2020", "2021", "n.a."]}})
+        assert len(result) == 6
+        result = ds.get_all_documents(filters={"year": ["2020", "2021", "n.a."]})
+        assert len(result) == 6
+
+        result = ds.get_all_documents(filters={"year": {"$ne": "2020"}})
+        assert len(result) == 6
+
+        result = ds.get_all_documents(filters={"year": {"$nin": ["2020", "2021", "n.a."]}})
+        assert len(result) == 3
+
+        result = ds.get_all_documents(filters={"numbers": {"$gt": 0}})
+        assert len(result) == 3
+
+        result = ds.get_all_documents(filters={"numbers": {"$gte": -2}})
+        assert len(result) == 6
+
+        result = ds.get_all_documents(filters={"numbers": {"$lt": 0}})
+        assert len(result) == 3
+
+        result = ds.get_all_documents(filters={"numbers": {"$lte": 2.0}})
+        assert len(result) == 6
+
+        # Test compound filters
+
+        result = ds.get_all_documents(filters={"year": {"$lte": "2021", "$gte": "2020"}})
+        assert len(result) == 6
+
+        filters = {"$and": {"year": {"$lte": "2021", "$gte": "2020"}, "name": {"$in": ["name_0", "name_1"]}}}
+        result = ds.get_all_documents(filters=filters)
+        assert len(result) == 4
+
+        filters_simplified = {"year": {"$lte": "2021", "$gte": "2020"}, "name": ["name_0", "name_1"]}
+        result = ds.get_all_documents(filters=filters_simplified)
+        assert len(result) == 4
+
+        filters = {
+            "$and": {
+                "year": {"$lte": "2021", "$gte": "2020"},
+                "$or": {"name": {"$in": ["name_0", "name_1"]}, "numbers": {"$lt": 5.0}},
+            }
+        }
+        result = ds.get_all_documents(filters=filters)
+        assert len(result) == 6
+
+        filters_simplified = {
+            "year": {"$lte": "2021", "$gte": "2020"},
+            "$or": {"name": {"$in": ["name_0", "name_2"]}, "numbers": {"$lt": 5.0}},
+        }
+        result = ds.get_all_documents(filters=filters_simplified)
+        assert len(result) == 6
+
+        filters = {
+            "$and": {
+                "year": {"$lte": "2021", "$gte": "2020"},
+                "$or": {
+                    "name": {"$in": ["name_0", "name_1"]},
+                    "$and": {"numbers": {"$lt": 5.0}, "$not": {"month": {"$eq": "01"}}},
+                },
+            }
+        }
+        result = ds.get_all_documents(filters=filters)
+        assert len(result) == 5
+
+        filters_simplified = {
+            "year": {"$lte": "2021", "$gte": "2020"},
+            "$or": {"name": ["name_0", "name_1"], "$and": {"numbers": {"$lt": 5.0}, "$not": {"month": {"$eq": "01"}}}},
+        }
+        result = ds.get_all_documents(filters=filters_simplified)
+        assert len(result) == 5
+
+        # Test nested logical operations within "$not", important as we apply De Morgan's laws in WeaviateDocumentstore
+
+        filters = {
+            "$not": {
+                "$or": {
+                    "$and": {"numbers": {"$lt": 5.0}, "month": {"$ne": "01"}},
+                    "$not": {"year": {"$lte": "2021", "$gte": "2020"}},
+                }
+            }
+        }
+        result = ds.get_all_documents(filters=filters)
+        docs_meta = result[0].meta["numbers"]
+        assert len(result) == 3
+        assert [2, 4] == docs_meta
+
+        # Test same logical operator twice on same level
+
+        filters = {
+            "$or": [
+                {"$and": {"name": {"$in": ["name_0", "name_1"]}, "year": {"$gte": "2020"}}},
+                {"$and": {"name": {"$in": ["name_0", "name_1"]}, "year": {"$lt": "2021"}}},
+            ]
+        }
+        result = ds.get_all_documents(filters=filters)
+        docs_meta = [doc.meta["name"] for doc in result]
+        assert len(result) == 4
+        assert "name_0" in docs_meta
+        assert "name_2" not in docs_meta
+
+    @pytest.mark.integration
+    def test_get_document_by_id(self, ds, documents):
+        ds.write_documents(documents)
+        doc = ds.get_document_by_id(documents[0].id)
+        assert doc.id == documents[0].id
+        assert doc.content == documents[0].content
+
+    @pytest.mark.integration
+    def test_get_documents_by_id(self, ds, documents):
+        ds.write_documents(documents)
+        ids = [doc.id for doc in documents]
+        result = {doc.id for doc in ds.get_documents_by_id(ids, batch_size=2)}
+        assert set(ids) == result
+
+    @pytest.mark.integration
+    def test_get_document_count(self, ds, documents):
+        ds.write_documents(documents)
+        assert ds.get_document_count() == 9
+        assert ds.get_document_count(filters={"year": ["2020"]}) == 3
+        assert ds.get_document_count(filters={"month": ["02"]}) == 3
+
+    @pytest.mark.integration
+    def test_get_all_documents_generator(self, ds, documents):
+        ds.write_documents(documents)
+        assert len(list(ds.get_all_documents_generator(batch_size=2))) == 9
+
+    @pytest.mark.integration
+    def test_duplicate_documents_skip(self, ds, documents):
+        ds.write_documents(documents)
+
+        updated_docs = []
+        for d in documents:
+            updated_d = Document.from_dict(d.to_dict())
+            updated_d.meta["name"] = "Updated"
+            updated_docs.append(updated_d)
+
+        ds.write_documents(updated_docs, duplicate_documents="skip")
+        result = ds.get_all_documents()
+        assert result[0].meta["name"] == "name_0"
+
+    @pytest.mark.integration
+    def test_duplicate_documents_overwrite(self, ds, documents):
+        ds.write_documents(documents)
+
+        updated_docs = []
+        for d in documents:
+            updated_d = Document.from_dict(d.to_dict())
+            updated_d.meta["name"] = "Updated"
+            updated_docs.append(updated_d)
+
+        ds.write_documents(updated_docs, duplicate_documents="overwrite")
+        for doc in ds.get_all_documents():
+            assert doc.meta["name"] == "Updated"
+
+    @pytest.mark.integration
+    def test_duplicate_documents_fail(self, ds, documents):
+        ds.write_documents(documents)
+
+        updated_docs = []
+        for d in documents:
+            updated_d = Document.from_dict(d.to_dict())
+            updated_d.meta["name"] = "Updated"
+            updated_docs.append(updated_d)
+
+        with pytest.raises(DuplicateDocumentError):
+            ds.write_documents(updated_docs, duplicate_documents="fail")
+
+    @pytest.mark.integration
+    def test_write_document_meta(self, ds):
+        ds.write_documents(
+            [
+                {"content": "dict_without_meta", "id": "1"},
+                {"content": "dict_with_meta", "meta_field": "test2", "id": "2"},
+                Document(content="document_object_without_meta", id="3"),
+                Document(content="document_object_with_meta", meta={"meta_field": "test4"}, id="4"),
+            ]
+        )
+        assert not ds.get_document_by_id("1").meta
+        assert ds.get_document_by_id("2").meta["meta_field"] == "test2"
+        assert not ds.get_document_by_id("3").meta
+        assert ds.get_document_by_id("4").meta["meta_field"] == "test4"
+
+    @pytest.mark.integration
+    def test_delete_documents(self, ds, documents):
+        ds.write_documents(documents)
+        ds.delete_documents()
+        assert ds.get_document_count() == 0
+
+    @pytest.mark.integration
+    def test_delete_documents_with_filters(self, ds, documents):
+        ds.write_documents(documents)
+        ds.delete_documents(filters={"year": ["2020", "2021"]})
+        documents = ds.get_all_documents()
+        assert ds.get_document_count() == 3
+
+    @pytest.mark.integration
+    def test_delete_documents_by_id(self, ds, documents):
+        ds.write_documents(documents)
+        docs_to_delete = ds.get_all_documents(filters={"year": ["2020"]})
+        ds.delete_documents(ids=[doc.id for doc in docs_to_delete])
+        assert ds.get_document_count() == 6
+
+    @pytest.mark.integration
+    def test_write_get_all_labels(self, ds, labels):
+        ds.write_labels(labels)
+        ds.write_labels(labels[:3], index="custom_index")
+        assert len(ds.get_all_labels()) == 9
+        assert len(ds.get_all_labels(index="custom_index")) == 3
+        # remove the index we created in this test
+        ds.delete_index("custom_index")
+
+    @pytest.mark.integration
+    def test_delete_labels(self, ds, labels):
+        ds.write_labels(labels)
+        ds.write_labels(labels[:3], index="custom_index")
+        ds.delete_labels()
+        ds.delete_labels(index="custom_index")
+        assert len(ds.get_all_labels()) == 0
+        assert len(ds.get_all_labels(index="custom_index")) == 0
+        # remove the index we created in this test
+        ds.delete_index("custom_index")
+
+    @pytest.mark.integration
+    def test_write_labels_duplicate(self, ds, labels):
+        # create a duplicate
+        dupe = Label.from_dict(labels[0].to_dict())
+
+        ds.write_labels(labels + [dupe])
+
+        # ensure the duplicate was discarded
+        assert len(ds.get_all_labels()) == len(labels)
+
+    @pytest.mark.integration
+    def test_delete_labels_by_id(self, ds, labels):
+        ds.write_labels(labels)
+        ds.delete_labels(ids=[labels[0].id])
+        assert len(ds.get_all_labels()) == len(labels) - 1
+
+    @pytest.mark.integration
+    def test_delete_labels_by_filter(self, ds, labels):
+        ds.write_labels(labels)
+        ds.delete_labels(filters={"query": "query_1"})
+        assert len(ds.get_all_labels()) == len(labels) - 1
+
+    @pytest.mark.integration
+    def test_delete_labels_by_filter_id(self, ds, labels):
+        ds.write_labels(labels)
+
+        # ids and filters are ANDed, the following should have no effect
+        ds.delete_labels(ids=[labels[0].id], filters={"query": "query_9"})
+        assert len(ds.get_all_labels()) == len(labels)
+
+        #
+        ds.delete_labels(ids=[labels[0].id], filters={"query": "query_0"})
+        assert len(ds.get_all_labels()) == len(labels) - 1
+
+    @pytest.mark.integration
+    def test_get_label_count(self, ds, labels):
+        ds.write_labels(labels)
+        assert ds.get_label_count() == len(labels)
+
+    @pytest.mark.integration
+    def test_delete_index(self, ds, documents):
+        ds.write_documents(documents, index="custom_index")
+        assert ds.get_document_count(index="custom_index") == len(documents)
+        ds.delete_index(index="custom_index")
+        with pytest.raises(Exception):
+            ds.get_document_count(index="custom_index")
+
+    @pytest.mark.integration
+    def test_update_meta(self, ds, documents):
+        ds.write_documents(documents)
+        doc = documents[0]
+        ds.update_document_meta(doc.id, meta={"year": "2099", "month": "12"})
+        doc = ds.get_document_by_id(doc.id)
+        assert doc.meta["year"] == "2099"
+        assert doc.meta["month"] == "12"
+
+    #
+    # Unit tests
+    #
+
+    @pytest.mark.unit
+    def test_normalize_embeddings_diff_shapes(self):
+        VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32")
+        BaseDocumentStore.normalize_embedding(VEC_1)
+        assert np.linalg.norm(VEC_1) - 1 < 0.01
+
+        VEC_1 = np.array([0.1, 0.2, 0.3], dtype="float32").reshape(1, -1)
+        BaseDocumentStore.normalize_embedding(VEC_1)
+        assert np.linalg.norm(VEC_1) - 1 < 0.01
--- a/test/document_stores/test_document_store.py
+++ b/test/document_stores/test_document_store.py
@ -77,84 +77,6 @@ DOCUMENTS = [
 ]


-@pytest.mark.elasticsearch
-def test_init_elastic_client():
-    # defaults
-    _ = ElasticsearchDocumentStore()
-
-    # list of hosts + single port
-    _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=9200)
-
-    # list of hosts + list of ports (wrong)
-    with pytest.raises(Exception):
-        _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200])
-
-    # list of hosts + list
-    _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200, 9200])
-
-    # only api_key
-    with pytest.raises(Exception):
-        _ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test")
-
-    # api_key +  id
-    _ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test", api_key_id="test")
-
-
-@pytest.mark.elasticsearch
-def test_init_elastic_doc_store_with_index_recreation():
-    index_name = "test_index_recreation"
-    label_index_name = "test_index_recreation_labels"
-
-    document_store = ElasticsearchDocumentStore(index=index_name, label_index=label_index_name)
-    documents = [Document(content="Doc1")]
-    labels = [
-        Label(
-            query="query",
-            document=documents[0],
-            is_correct_document=True,
-            is_correct_answer=False,
-            origin="user-feedback",
-            answer=None,
-        )
-    ]
-    document_store.write_documents(documents, index=index_name)
-    document_store.write_labels(labels, index=label_index_name)
-
-    document_store = ElasticsearchDocumentStore(index=index_name, label_index=label_index_name, recreate_index=True)
-    docs = document_store.get_all_documents(index=index_name)
-    labels = document_store.get_all_labels(index=label_index_name)
-
-    assert len(docs) == 0
-    assert len(labels) == 0
-
-
-@pytest.mark.elasticsearch
-def test_elasticsearch_eq_filter():
-    documents = [
-        {"content": "some text", "id": "1", "keyword_field": ["x", "y", "z"], "number_field": [1, 2, 3, 4]},
-        {"content": "some text", "id": "2", "keyword_field": ["x", "y", "w"], "number_field": [1, 2, 3]},
-        {"content": "some text", "id": "3", "keyword_field": ["x", "z"], "number_field": [2, 4]},
-        {"content": "some text", "id": "4", "keyword_field": ["z", "x"], "number_field": [5, 6]},
-        {"content": "some text", "id": "5", "keyword_field": ["x", "y"], "number_field": [2, 3]},
-    ]
-
-    index = "test_elasticsearch_eq_filter"
-    document_store = ElasticsearchDocumentStore(index=index, recreate_index=True)
-    document_store.write_documents(documents)
-
-    filter = {"keyword_field": {"$eq": ["z", "x"]}}
-    filtered_docs = document_store.get_all_documents(index=index, filters=filter)
-    assert len(filtered_docs) == 2
-    for doc in filtered_docs:
-        assert set(doc.meta["keyword_field"]) == {"x", "z"}
-
-    filter = {"number_field": {"$eq": [2, 3]}}
-    filtered_docs = document_store.query(query=None, index=index, filters=filter)
-    assert len(filtered_docs) == 1
-    assert filtered_docs[0].meta["number_field"] == [2, 3]
-    assert filtered_docs[0].id == "5"
-
-
 def test_write_with_duplicate_doc_ids(document_store: BaseDocumentStore):
    duplicate_documents = [
        Document(content="Doc1", id_hash_keys=["content"]),
@ -1274,164 +1196,6 @@ def test_get_meta_values_by_key(document_store: BaseDocumentStore):
        assert bucket["count"] == 1


-@pytest.mark.elasticsearch
-def test_elasticsearch_custom_fields():
-    document_store = ElasticsearchDocumentStore(
-        index="haystack_test_custom",
-        content_field="custom_text_field",
-        embedding_field="custom_embedding_field",
-        recreate_index=True,
-    )
-
-    doc_to_write = {"custom_text_field": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
-    document_store.write_documents([doc_to_write])
-    documents = document_store.get_all_documents(return_embedding=True)
-    assert len(documents) == 1
-    assert documents[0].content == "test"
-    np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
-
-
-@pytest.mark.elasticsearch
-def test_elasticsearch_delete_index():
-    client = Elasticsearch()
-    index_name = "haystack_test_deletion"
-
-    document_store = ElasticsearchDocumentStore(index=index_name)
-
-    # the index should exist
-    index_exists = client.indices.exists(index=index_name)
-    assert index_exists
-
-    document_store.delete_index(index_name)
-
-    # the index was deleted and should not exist
-    index_exists = client.indices.exists(index=index_name)
-    assert not index_exists
-
-
-@pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
-def test_elasticsearch_query_with_filters_and_missing_embeddings(document_store: ElasticsearchDocumentStore):
-    document_store.write_documents(DOCUMENTS)
-    document_without_embedding = Document(
-        content="Doc without embedding", meta={"name": "name_7", "year": "2021", "month": "04"}
-    )
-    document_store.write_documents([document_without_embedding])
-    filters = {"year": "2021"}
-    document_store.skip_missing_embeddings = False
-    with pytest.raises(RequestError):
-        document_store.query_by_embedding(np.random.rand(768), filters=filters)
-
-    document_store.skip_missing_embeddings = True
-    documents = document_store.query_by_embedding(np.random.rand(768), filters=filters)
-    assert len(documents) == 3
-
-
-@pytest.mark.elasticsearch
-def test_get_document_count_only_documents_without_embedding_arg():
-    documents = [
-        {
-            "content": "text1",
-            "id": "1",
-            "embedding": np.random.rand(768).astype(np.float32),
-            "meta_field_for_count": "a",
-        },
-        {
-            "content": "text2",
-            "id": "2",
-            "embedding": np.random.rand(768).astype(np.float64),
-            "meta_field_for_count": "b",
-        },
-        {"content": "text3", "id": "3", "embedding": np.random.rand(768).astype(np.float32).tolist()},
-        {"content": "text4", "id": "4", "meta_field_for_count": "b"},
-        {"content": "text5", "id": "5", "meta_field_for_count": "b"},
-        {"content": "text6", "id": "6", "meta_field_for_count": "c"},
-        {
-            "content": "text7",
-            "id": "7",
-            "embedding": np.random.rand(768).astype(np.float64),
-            "meta_field_for_count": "c",
-        },
-    ]
-
-    _index: str = "haystack_test_count"
-    document_store = ElasticsearchDocumentStore(index=_index, recreate_index=True)
-
-    document_store.write_documents(documents)
-
-    assert document_store.get_document_count() == 7
-    assert document_store.get_document_count(only_documents_without_embedding=True) == 3
-    assert (
-        document_store.get_document_count(
-            only_documents_without_embedding=True, filters={"meta_field_for_count": ["c"]}
-        )
-        == 1
-    )
-    assert (
-        document_store.get_document_count(
-            only_documents_without_embedding=True, filters={"meta_field_for_count": ["b"]}
-        )
-        == 2
-    )
-
-
-@pytest.mark.elasticsearch
-def test_skip_missing_embeddings(caplog):
-    documents = [
-        {"content": "text1", "id": "1"},  # a document without embeddings
-        {"content": "text2", "id": "2", "embedding": np.random.rand(768).astype(np.float64)},
-        {"content": "text3", "id": "3", "embedding": np.random.rand(768).astype(np.float32).tolist()},
-        {"content": "text4", "id": "4", "embedding": np.random.rand(768).astype(np.float32)},
-    ]
-    document_store = ElasticsearchDocumentStore(index="skip_missing_embedding_index", recreate_index=True)
-    document_store.write_documents(documents)
-
-    document_store.skip_missing_embeddings = True
-    retrieved_docs = document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
-    assert len(retrieved_docs) == 3
-
-    document_store.skip_missing_embeddings = False
-    with pytest.raises(RequestError):
-        document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
-
-    # Test scenario with no embeddings for the entire index
-    documents = [
-        {"content": "text1", "id": "1"},
-        {"content": "text2", "id": "2"},
-        {"content": "text3", "id": "3"},
-        {"content": "text4", "id": "4"},
-    ]
-
-    document_store.delete_documents()
-    document_store.write_documents(documents)
-
-    document_store.skip_missing_embeddings = True
-    with caplog.at_level(logging.WARNING):
-        document_store.query_by_embedding(np.random.rand(768).astype(np.float32))
-        assert "No documents with embeddings. Run the document store's update_embeddings() method." in caplog.text
-
-
-@pytest.mark.elasticsearch
-def test_elasticsearch_synonyms():
-    synonyms = ["i-pod, i pod, ipod", "sea biscuit, sea biscit, seabiscuit", "foo, foo bar, baz"]
-    synonym_type = "synonym_graph"
-
-    client = Elasticsearch()
-    client.indices.delete(index="haystack_synonym_arg", ignore=[404])
-    document_store = ElasticsearchDocumentStore(
-        index="haystack_synonym_arg", synonyms=synonyms, synonym_type=synonym_type
-    )
-    indexed_settings = client.indices.get_settings(index="haystack_synonym_arg")
-
-    assert (
-        synonym_type
-        == indexed_settings["haystack_synonym_arg"]["settings"]["index"]["analysis"]["filter"]["synonym"]["type"]
-    )
-    assert (
-        synonyms
-        == indexed_settings["haystack_synonym_arg"]["settings"]["index"]["analysis"]["filter"]["synonym"]["synonyms"]
-    )
-
-
@pytest.mark.parametrize(
    "document_store_with_docs", ["memory", "faiss", "milvus1", "weaviate", "elasticsearch"], indirect=True
 )
@ -1980,105 +1744,6 @@ def test_DeepsetCloudDocumentStore_query_without_index():
    assert document_store.query(query="some query") == []


-@pytest.mark.elasticsearch
-def test_elasticsearch_search_field_mapping():
-
-    client = Elasticsearch()
-    client.indices.delete(index="haystack_search_field_mapping", ignore=[404])
-
-    index_data = [
-        {
-            "title": "Green tea components",
-            "meta": {
-                "content": "The green tea plant contains a range of healthy compounds that make it into the final drink",
-                "sub_content": "Drink tip",
-            },
-            "id": "1",
-        },
-        {
-            "title": "Green tea catechin",
-            "meta": {
-                "content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).",
-                "sub_content": "Ingredients tip",
-            },
-            "id": "2",
-        },
-        {
-            "title": "Minerals in Green tea",
-            "meta": {
-                "content": "Green tea also has small amounts of minerals that can benefit your health.",
-                "sub_content": "Minerals tip",
-            },
-            "id": "3",
-        },
-        {
-            "title": "Green tea Benefits",
-            "meta": {
-                "content": "Green tea does more than just keep you alert, it may also help boost brain function.",
-                "sub_content": "Health tip",
-            },
-            "id": "4",
-        },
-    ]
-
-    document_store = ElasticsearchDocumentStore(
-        index="haystack_search_field_mapping", search_fields=["content", "sub_content"], content_field="title"
-    )
-    document_store.write_documents(index_data)
-
-    indexed_settings = client.indices.get_mapping(index="haystack_search_field_mapping")
-
-    assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["content"]["type"] == "text"
-    assert indexed_settings["haystack_search_field_mapping"]["mappings"]["properties"]["sub_content"]["type"] == "text"
-
-
-@pytest.mark.elasticsearch
-def test_elasticsearch_existing_alias():
-
-    client = Elasticsearch()
-    client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
-    client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
-    client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
-
-    settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
-
-    client.indices.create(index="haystack_existing_alias_1", body=settings)
-    client.indices.create(index="haystack_existing_alias_2", body=settings)
-
-    client.indices.put_alias(
-        index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
-    )
-
-    # To be valid, all indices related to the alias must have content field of type text
-    _ = ElasticsearchDocumentStore(index="haystack_existing_alias", search_fields=["content"])
-
-
-@pytest.mark.elasticsearch
-def test_elasticsearch_existing_alias_missing_fields():
-
-    client = Elasticsearch()
-    client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
-    client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
-    client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
-
-    right_settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
-
-    wrong_settings = {"mappings": {"properties": {"content": {"type": "histogram"}}}}
-
-    client.indices.create(index="haystack_existing_alias_1", body=right_settings)
-    client.indices.create(index="haystack_existing_alias_2", body=wrong_settings)
-
-    client.indices.put_alias(
-        index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
-    )
-
-    with pytest.raises(Exception):
-        # wrong field type for "content" in index "haystack_existing_alias_2"
-        _ = ElasticsearchDocumentStore(
-            index="haystack_existing_alias", search_fields=["content"], content_field="title"
-        )
-
-
@pytest.mark.parametrize("document_store_with_docs", ["elasticsearch"], indirect=True)
 def test_elasticsearch_brownfield_support(document_store_with_docs):
    new_document_store = InMemoryDocumentStore()
@ -2122,9 +1787,7 @@ def test_elasticsearch_brownfield_support(document_store_with_docs):


@pytest.mark.parametrize(
-    "document_store",
-    ["faiss", "milvus1", "milvus", "weaviate", "opensearch_faiss", "opensearch", "elasticsearch", "memory"],
-    indirect=True,
+    "document_store", ["faiss", "milvus1", "milvus", "weaviate", "opensearch", "elasticsearch", "memory"], indirect=True
 )
 def test_cosine_similarity(document_store: BaseDocumentStore):
    # below we will write documents to the store and then query it to see if vectors were normalized or not
@ -2166,9 +1829,7 @@ def test_cosine_similarity(document_store: BaseDocumentStore):


@pytest.mark.parametrize(
-    "document_store",
-    ["faiss", "milvus1", "milvus", "weaviate", "opensearch_faiss", "opensearch", "elasticsearch", "memory"],
-    indirect=True,
+    "document_store", ["faiss", "milvus1", "milvus", "weaviate", "opensearch", "elasticsearch", "memory"], indirect=True
 )
 def test_update_embeddings_cosine_similarity(document_store: BaseDocumentStore):
    # below we will write documents to the store and then query it to see if vectors were normalized
@ -2228,7 +1889,7 @@ def test_update_embeddings_cosine_similarity(document_store: BaseDocumentStore):

@pytest.mark.parametrize(
    "document_store_small",
-    ["faiss", "milvus1", "milvus", "weaviate", "memory", "elasticsearch", "opensearch", "opensearch_faiss"],
+    ["faiss", "milvus1", "milvus", "weaviate", "memory", "elasticsearch", "opensearch"],
    indirect=True,
 )
 def test_cosine_sanity_check(document_store_small):
--- a/test/document_stores/test_elasticsearch.py
+++ b/test/document_stores/test_elasticsearch.py
@ -0,0 +1,225 @@
+import os
+import pytest
+
+import numpy as np
+
+from haystack.schema import Document
+from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
+
+from .test_base import DocumentStoreBaseTestAbstract
+from .test_search_engine import SearchEngineDocumentStoreTestAbstract
+
+
+class TestElasticsearchDocumentStore(DocumentStoreBaseTestAbstract, SearchEngineDocumentStoreTestAbstract):
+    # Constants
+
+    index_name = __name__
+
+    @pytest.fixture
+    def ds(self):
+        """
+        This fixture provides a working document store and takes care of removing the indices when done
+        """
+        labels_index_name = f"{self.index_name}_labels"
+        ds = ElasticsearchDocumentStore(
+            index=self.index_name,
+            label_index=labels_index_name,
+            host=os.environ.get("ELASTICSEARCH_HOST", "localhost"),
+            create_index=True,
+        )
+        yield ds
+        ds.delete_index(self.index_name)
+        ds.delete_index(labels_index_name)
+
+    @pytest.mark.integration
+    def test___init__(self):
+        # defaults
+        _ = ElasticsearchDocumentStore()
+
+        # list of hosts + single port
+        _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=9200)
+
+        # list of hosts + list of ports (wrong)
+        with pytest.raises(Exception):
+            _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200])
+
+        # list of hosts + list
+        _ = ElasticsearchDocumentStore(host=["localhost", "127.0.0.1"], port=[9200, 9200])
+
+        # only api_key
+        with pytest.raises(Exception):
+            _ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test")
+
+        # api_key +  id
+        _ = ElasticsearchDocumentStore(host=["localhost"], port=[9200], api_key="test", api_key_id="test")
+
+    @pytest.mark.integration
+    def test_recreate_index(self, ds, documents, labels):
+        ds.write_documents(documents)
+        ds.write_labels(labels)
+
+        # Create another document store on top of the previous one
+        ds = ElasticsearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True)
+        assert len(ds.get_all_documents(index=ds.index)) == 0
+        assert len(ds.get_all_labels(index=ds.label_index)) == 0
+
+    @pytest.mark.integration
+    def test_eq_filter(self, ds, documents):
+        ds.write_documents(documents)
+
+        filter = {"name": {"$eq": ["name_0"]}}
+        filtered_docs = ds.get_all_documents(filters=filter)
+        assert len(filtered_docs) == 3
+        for doc in filtered_docs:
+            assert doc.meta["name"] == "name_0"
+
+        filter = {"numbers": {"$eq": [2, 4]}}
+        filtered_docs = ds.query(query=None, filters=filter)
+        assert len(filtered_docs) == 3
+        for doc in filtered_docs:
+            assert doc.meta["month"] == "01"
+            assert doc.meta["numbers"] == [2, 4]
+
+    @pytest.mark.integration
+    def test_custom_fields(self, ds):
+        index = "haystack_test_custom"
+        document_store = ElasticsearchDocumentStore(
+            index=index,
+            content_field="custom_text_field",
+            embedding_field="custom_embedding_field",
+            recreate_index=True,
+        )
+        doc_to_write = {"custom_text_field": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
+        document_store.write_documents([doc_to_write])
+        documents = document_store.get_all_documents(return_embedding=True)
+        assert len(documents) == 1
+        assert documents[0].content == "test"
+        np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
+        document_store.delete_index(index)
+
+    @pytest.mark.integration
+    def test_query_with_filters_and_missing_embeddings(self, ds, documents):
+        ds.write_documents(documents)
+        filters = {"month": {"$in": ["01", "03"]}}
+        ds.skip_missing_embeddings = False
+        with pytest.raises(ds._RequestError):
+            ds.query_by_embedding(np.random.rand(768), filters=filters)
+
+        ds.skip_missing_embeddings = True
+        documents = ds.query_by_embedding(np.random.rand(768), filters=filters)
+        assert len(documents) == 3
+
+    @pytest.mark.integration
+    def test_synonyms(self, ds):
+        synonyms = ["i-pod, i pod, ipod", "sea biscuit, sea biscit, seabiscuit", "foo, foo bar, baz"]
+        synonym_type = "synonym_graph"
+
+        client = ds.client
+        index = "haystack_synonym_arg"
+        client.indices.delete(index=index, ignore=[404])
+        ElasticsearchDocumentStore(index=index, synonyms=synonyms, synonym_type=synonym_type)
+        indexed_settings = client.indices.get_settings(index=index)
+
+        assert synonym_type == indexed_settings[index]["settings"]["index"]["analysis"]["filter"]["synonym"]["type"]
+        assert synonyms == indexed_settings[index]["settings"]["index"]["analysis"]["filter"]["synonym"]["synonyms"]
+
+    @pytest.mark.integration
+    def test_search_field_mapping(self):
+        index = "haystack_search_field_mapping"
+        document_store = ElasticsearchDocumentStore(
+            index=index, search_fields=["content", "sub_content"], content_field="title"
+        )
+
+        document_store.write_documents(
+            [
+                {
+                    "title": "Green tea components",
+                    "meta": {
+                        "content": "The green tea plant contains a range of healthy compounds that make it into the final drink",
+                        "sub_content": "Drink tip",
+                    },
+                    "id": "1",
+                },
+                {
+                    "title": "Green tea catechin",
+                    "meta": {
+                        "content": "Green tea contains a catechin called epigallocatechin-3-gallate (EGCG).",
+                        "sub_content": "Ingredients tip",
+                    },
+                    "id": "2",
+                },
+                {
+                    "title": "Minerals in Green tea",
+                    "meta": {
+                        "content": "Green tea also has small amounts of minerals that can benefit your health.",
+                        "sub_content": "Minerals tip",
+                    },
+                    "id": "3",
+                },
+                {
+                    "title": "Green tea Benefits",
+                    "meta": {
+                        "content": "Green tea does more than just keep you alert, it may also help boost brain function.",
+                        "sub_content": "Health tip",
+                    },
+                    "id": "4",
+                },
+            ]
+        )
+
+        indexed_settings = document_store.client.indices.get_mapping(index=index)
+
+        assert indexed_settings[index]["mappings"]["properties"]["content"]["type"] == "text"
+        assert indexed_settings[index]["mappings"]["properties"]["sub_content"]["type"] == "text"
+        document_store.delete_index(index)
+
+    @pytest.mark.integration
+    def test_existing_alias(self, ds):
+        client = ds.client
+        client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
+        client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
+        client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
+
+        settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
+
+        client.indices.create(index="haystack_existing_alias_1", body=settings)
+        client.indices.create(index="haystack_existing_alias_2", body=settings)
+
+        client.indices.put_alias(
+            index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
+        )
+
+        # To be valid, all indices related to the alias must have content field of type text
+        ElasticsearchDocumentStore(index="haystack_existing_alias", search_fields=["content"])
+
+    @pytest.mark.integration
+    def test_existing_alias_missing_fields(self, ds):
+
+        client = ds.client
+        client.indices.delete(index="haystack_existing_alias_1", ignore=[404])
+        client.indices.delete(index="haystack_existing_alias_2", ignore=[404])
+        client.indices.delete_alias(index="_all", name="haystack_existing_alias", ignore=[404])
+
+        right_settings = {"mappings": {"properties": {"content": {"type": "text"}}}}
+        wrong_settings = {"mappings": {"properties": {"content": {"type": "histogram"}}}}
+
+        client.indices.create(index="haystack_existing_alias_1", body=right_settings)
+        client.indices.create(index="haystack_existing_alias_2", body=wrong_settings)
+        client.indices.put_alias(
+            index="haystack_existing_alias_1,haystack_existing_alias_2", name="haystack_existing_alias"
+        )
+
+        with pytest.raises(Exception):
+            # wrong field type for "content" in index "haystack_existing_alias_2"
+            ElasticsearchDocumentStore(
+                index="haystack_existing_alias", search_fields=["content"], content_field="title"
+            )
+
+    @pytest.mark.integration
+    def test_get_document_count_only_documents_without_embedding_arg(self, ds, documents):
+        ds.write_documents(documents)
+
+        assert ds.get_document_count() == 9
+        assert ds.get_document_count(only_documents_without_embedding=True) == 3
+        assert ds.get_document_count(only_documents_without_embedding=True, filters={"month": ["01"]}) == 0
+        assert ds.get_document_count(only_documents_without_embedding=True, filters={"month": ["03"]}) == 3
--- a/test/document_stores/test_opensearch.py
+++ b/test/document_stores/test_opensearch.py
@ -1,3 +1,4 @@
+import os
 import logging

 from unittest.mock import MagicMock, patch
@ -19,15 +20,16 @@ from haystack.document_stores.opensearch import (
 from haystack.schema import Document, Label, Answer
 from haystack.errors import DocumentStoreError

-# Being all the tests in this module, ideally we wouldn't need a marker here,
-# but this is to allow this test suite to be skipped when running (e.g.)
-# `pytest test/document_stores --document-store-type=faiss`
-class TestOpenSearchDocumentStore:
+from .test_base import DocumentStoreBaseTestAbstract
+from .test_search_engine import SearchEngineDocumentStoreTestAbstract
+
+
+class TestOpenSearchDocumentStore(DocumentStoreBaseTestAbstract, SearchEngineDocumentStoreTestAbstract):

    # Constants

    query_emb = np.random.random_sample(size=(2, 2))
-    index_name = "myindex"
+    index_name = __name__

    # Fixtures

@ -36,11 +38,15 @@ class TestOpenSearchDocumentStore:
        """
        This fixture provides a working document store and takes care of removing the indices when done
        """
-        index_name = __name__
-        labels_index_name = f"{index_name}_labels"
-        ds = OpenSearchDocumentStore(index=index_name, label_index=labels_index_name, port=9201, create_index=True)
+        labels_index_name = f"{self.index_name}_labels"
+        ds = OpenSearchDocumentStore(
+            index=self.index_name,
+            label_index=labels_index_name,
+            host=os.environ.get("OPENSEARCH_HOST", "localhost"),
+            create_index=True,
+        )
        yield ds
-        ds.delete_index(index_name)
+        ds.delete_index(self.index_name)
        ds.delete_index(labels_index_name)

    @pytest.fixture
@ -82,35 +88,6 @@ class TestOpenSearchDocumentStore:
            "use_system_proxy": True,
        }

-    @pytest.fixture
-    def documents(self):
-        documents = []
-        for i in range(3):
-            documents.append(
-                Document(
-                    content=f"A Foo Document {i}",
-                    meta={"name": f"name_{i}", "year": "2020", "month": "01"},
-                    embedding=np.random.rand(768).astype(np.float32),
-                )
-            )
-
-            documents.append(
-                Document(
-                    content=f"A Bar Document {i}",
-                    meta={"name": f"name_{i}", "year": "2021", "month": "02"},
-                    embedding=np.random.rand(768).astype(np.float32),
-                )
-            )
-
-            documents.append(
-                Document(
-                    content=f"Document {i} without embeddings",
-                    meta={"name": f"name_{i}", "no_embedding": True, "month": "03"},
-                )
-            )
-
-        return documents
-
    @pytest.fixture
    def index(self):
        return {
@ -143,46 +120,15 @@ class TestOpenSearchDocumentStore:
            },
        }

-    @pytest.fixture
-    def labels(self, documents):
-        labels = []
-        for i, d in enumerate(documents):
-            labels.append(
-                Label(
-                    query="query",
-                    document=d,
-                    is_correct_document=True,
-                    is_correct_answer=False,
-                    # create a mix set of labels
-                    origin="user-feedback" if i % 2 else "gold-label",
-                    answer=None if not i else Answer(f"the answer is {i}"),
-                )
-            )
-        return labels
-
    # Integration tests

    @pytest.mark.integration
    def test___init__(self):
-        OpenSearchDocumentStore(index="default_index", port=9201, create_index=True)
+        OpenSearchDocumentStore(index="default_index", create_index=True)

    @pytest.mark.integration
    def test___init___faiss(self):
-        OpenSearchDocumentStore(index="faiss_index", port=9201, create_index=True, knn_engine="faiss")
-
-    @pytest.mark.integration
-    def test_write_documents(self, ds, documents):
-        ds.write_documents(documents)
-        docs = ds.get_all_documents()
-        assert len(docs) == len(documents)
-        for i, doc in enumerate(docs):
-            expected = documents[i]
-            assert doc.id == expected.id
-
-    @pytest.mark.integration
-    def test_write_labels(self, ds, labels):
-        ds.write_labels(labels)
-        assert ds.get_all_labels() == labels
+        OpenSearchDocumentStore(index="faiss_index", create_index=True, knn_engine="faiss")

    @pytest.mark.integration
    def test_recreate_index(self, ds, documents, labels):
@ -190,7 +136,7 @@ class TestOpenSearchDocumentStore:
        ds.write_labels(labels)

        # Create another document store on top of the previous one
-        ds = OpenSearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True, port=9201)
+        ds = OpenSearchDocumentStore(index=ds.index, label_index=ds.label_index, recreate_index=True)
        assert len(ds.get_all_documents(index=ds.index)) == 0
        assert len(ds.get_all_labels(index=ds.label_index)) == 0

@ -213,7 +159,7 @@ class TestOpenSearchDocumentStore:
        assert ds.embeddings_field_supports_similarity == True
        index_name = ds.index
        with caplog.at_level(logging.WARNING):
-            ds = OpenSearchDocumentStore(port=9201, knn_engine="faiss", index=index_name)
+            ds = OpenSearchDocumentStore(knn_engine="faiss", index=index_name)
            warning = (
                "Embedding field 'embedding' was initially created with knn_engine 'nmslib', but knn_engine was "
                "set to 'faiss' when initializing OpenSearchDocumentStore. Falling back to slow exact vector "
--- a/test/document_stores/test_search_engine.py
+++ b/test/document_stores/test_search_engine.py
@ -0,0 +1,58 @@
+import pytest
+from haystack.document_stores.search_engine import SearchEngineDocumentStore, prepare_hosts
+
+
+@pytest.mark.unit
+def test_prepare_hosts():
+    pass
+
+
+@pytest.mark.document_store
+class SearchEngineDocumentStoreTestAbstract:
+    """
+    This is the base class for any Searchengine Document Store testsuite, it doesn't have the `Test` prefix in the name
+    because we want to run its methods only in subclasses.
+    """
+
+    @pytest.mark.integration
+    def test___do_bulk(self):
+        pass
+
+    @pytest.mark.integration
+    def test___do_scan(self):
+        pass
+
+    @pytest.mark.integration
+    def test_query_by_embedding(self):
+        pass
+
+    @pytest.mark.integration
+    def test_get_meta_values_by_key(self, ds, documents):
+        ds.write_documents(documents)
+
+        # test without filters or query
+        result = ds.get_metadata_values_by_key(key="name")
+        assert result == [
+            {"count": 3, "value": "name_0"},
+            {"count": 3, "value": "name_1"},
+            {"count": 3, "value": "name_2"},
+        ]
+
+        # test with filters but no query
+        result = ds.get_metadata_values_by_key(key="year", filters={"month": ["01"]})
+        assert result == [{"count": 3, "value": "2020"}]
+
+        # test with filters & query
+        result = ds.get_metadata_values_by_key(key="year", query="Bar")
+        assert result == [{"count": 3, "value": "2021"}]
+
+
+@pytest.mark.document_store
+class TestSearchEngineDocumentStore:
+    """
+    This class tests the concrete methods in SearchEngineDocumentStore
+    """
+
+    @pytest.mark.integration
+    def test__split_document_list(self):
+        pass