refactor: refactor FAISS tests (#3537)

* fix write docs behaviour * refactor FAISS tests * do not remove the sqlite db * try * remove extra slash * Apply suggestions from code review Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> * review comments * Update test/document_stores/test_faiss.py Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> * review comments Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
2026-01-05 11:38:20 +00:00 · 2022-11-08 16:37:01 +01:00 · 2022-11-08 16:37:01 +01:00 · 3319ef6d1c
commit 3319ef6d1c
parent 9539a209ae
3 changed files with 240 additions and 338 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -295,10 +295,41 @@ jobs:
          status: ${{ job.status }}
          channel: '#haystack'
        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+
+  integration-tests-faiss:
+    name: Integration / faiss / ${{ matrix.os }}
+    needs:
+     - unit-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest,macos-latest,windows-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup Python
+        uses: ./.github/actions/python_cache/
+
+      - name: Install Haystack
+        run: pip install -U .
+
+      - name: Run tests
+        run: |
+          pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_faiss.py
+
+      - uses: act10ns/slack@v1
+        with:
+          status: ${{ job.status }}
+          channel: '#haystack'
+        if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
+
+
 #
 # TODO: the following steps need to be revisited
 #

+
  unit-tests-linux:
    needs: [mypy, pylint, black]
    strategy:
@ -392,73 +423,6 @@ jobs:
        channel: '#haystack'
      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

-  faiss-tests-linux:
-    needs:
-     - mypy
-     - pylint
-    runs-on: ubuntu-latest
-    if: contains(github.event.pull_request.labels.*.name, 'topic:faiss') || !github.event.pull_request.draft
-
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Setup Python
-      uses: ./.github/actions/python_cache/
-
-      # TODO Let's try to remove this one from the unit tests
-    - name: Install pdftotext
-      run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin
-
-    - name: Install Haystack
-      run: pip install .[faiss]
-
-    - name: Run tests
-      env:
-        TOKENIZERS_PARALLELISM: 'false'
-      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" test/document_stores/ --document_store_type=faiss
-
-    - uses: act10ns/slack@v1
-      with:
-        status: ${{ job.status }}
-        channel: '#haystack'
-      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
-  faiss-tests-windows:
-    needs:
-     - mypy
-     - pylint
-    runs-on: windows-latest
-    if: contains(github.event.pull_request.labels.*.name, 'topic:faiss') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft || !github.event.pull_request.draft
-
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Setup Python
-      uses: ./.github/actions/python_cache/
-      with:
-        prefix: windows
-
-    - name: Install pdftotext
-      run: |
-        choco install xpdf-utils
-        choco install openjdk11
-        refreshenv
-    - name: Install Haystack
-      run: pip install .[faiss]
-
-    - name: Run tests
-      env:
-        TOKENIZERS_PARALLELISM: 'false'
-      run: |
-        pytest ${{ env.PYTEST_PARAMS }} -m "faiss and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=faiss
-
-    - uses: act10ns/slack@v1
-      with:
-        status: ${{ job.status }}
-        channel: '#haystack'
-      if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'
-
  milvus-tests-linux:
    needs: [mypy, pylint, black]
    runs-on: ubuntu-latest
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@ -258,7 +258,7 @@ class FAISSDocumentStore(SQLDocumentStore):
            documents=document_objects, index=index, duplicate_documents=duplicate_documents
        )
        if len(document_objects) > 0:
-            add_vectors = False if document_objects[0].embedding is None else True
+            add_vectors = all(doc.embedding is not None for doc in document_objects)

            if self.duplicate_documents == "overwrite" and add_vectors:
                logger.warning(
@ -494,7 +494,7 @@ class FAISSDocumentStore(SQLDocumentStore):
            raise NotImplementedError("FAISSDocumentStore does not support headers.")

        logger.warning(
-            """DEPRECATION WARNINGS: 
+            """DEPRECATION WARNINGS:
                1. delete_all_documents() method is deprecated, please use delete_documents method
                For more details, please refer to the issue: https://github.com/deepset-ai/haystack/issues/1045
                """
--- a/test/document_stores/test_faiss.py
+++ b/test/document_stores/test_faiss.py
@ -1,4 +1,5 @@
 import sys
+import os

 import yaml
 import faiss
@ -8,311 +9,248 @@ import numpy as np
 from haystack.schema import Document
 from haystack.document_stores.faiss import FAISSDocumentStore

+from .test_base import DocumentStoreBaseTestAbstract
+
 from haystack.pipelines import Pipeline
 from haystack.nodes.retriever.dense import EmbeddingRetriever

 from ..conftest import MockDenseRetriever


-DOCUMENTS = [
-    {
-        "meta": {"name": "name_1", "year": "2020", "month": "01"},
-        "content": "text_1",
-        "embedding": np.random.rand(768).astype(np.float32),
-    },
-    {
-        "meta": {"name": "name_2", "year": "2020", "month": "02"},
-        "content": "text_2",
-        "embedding": np.random.rand(768).astype(np.float32),
-    },
-    {
-        "meta": {"name": "name_3", "year": "2020", "month": "03"},
-        "content": "text_3",
-        "embedding": np.random.rand(768).astype(np.float64),
-    },
-    {
-        "meta": {"name": "name_4", "year": "2021", "month": "01"},
-        "content": "text_4",
-        "embedding": np.random.rand(768).astype(np.float32),
-    },
-    {
-        "meta": {"name": "name_5", "year": "2021", "month": "02"},
-        "content": "text_5",
-        "embedding": np.random.rand(768).astype(np.float32),
-    },
-    {
-        "meta": {"name": "name_6", "year": "2021", "month": "03"},
-        "content": "text_6",
-        "embedding": np.random.rand(768).astype(np.float64),
-    },
-]
-
-
-@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Test with tmp_path not working on windows runner")
-def test_faiss_index_save_and_load(tmp_path, sql_url):
-    document_store = FAISSDocumentStore(
-        sql_url=sql_url,
-        index="haystack_test",
-        progress_bar=False,  # Just to check if the init parameters are kept
-        isolation_level="AUTOCOMMIT",
-    )
-    document_store.write_documents(DOCUMENTS)
-
-    # test saving the index
-    document_store.save(tmp_path / "haystack_test_faiss")
-
-    # clear existing faiss_index
-    document_store.faiss_indexes[document_store.index].reset()
-
-    # test faiss index is cleared
-    assert document_store.faiss_indexes[document_store.index].ntotal == 0
-
-    # test loading the index
-    new_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss")
-
-    # check faiss index is restored
-    assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-    # check if documents are restored
-    assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
-    # Check if the init parameters are kept
-    assert not new_document_store.progress_bar
-
-    # test saving and loading the loaded faiss index
-    new_document_store.save(tmp_path / "haystack_test_faiss")
-    reloaded_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss")
-
-    # check faiss index is restored
-    assert reloaded_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-    # check if documents are restored
-    assert len(reloaded_document_store.get_all_documents()) == len(DOCUMENTS)
-    # Check if the init parameters are kept
-    assert not reloaded_document_store.progress_bar
-
-    # test loading the index via init
-    new_document_store = FAISSDocumentStore(faiss_index_path=tmp_path / "haystack_test_faiss")
-
-    # check faiss index is restored
-    assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-    # check if documents are restored
-    assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
-    # Check if the init parameters are kept
-    assert not new_document_store.progress_bar
-
-
-@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Test with tmp_path not working on windows runner")
-def test_faiss_index_save_and_load_custom_path(tmp_path, sql_url):
-    document_store = FAISSDocumentStore(
-        sql_url=sql_url,
-        index="haystack_test",
-        progress_bar=False,  # Just to check if the init parameters are kept
-        isolation_level="AUTOCOMMIT",
-    )
-    document_store.write_documents(DOCUMENTS)
-
-    # test saving the index
-    document_store.save(index_path=tmp_path / "haystack_test_faiss", config_path=tmp_path / "custom_path.json")
-
-    # clear existing faiss_index
-    document_store.faiss_indexes[document_store.index].reset()
-
-    # test faiss index is cleared
-    assert document_store.faiss_indexes[document_store.index].ntotal == 0
-
-    # test loading the index
-    new_document_store = FAISSDocumentStore.load(
-        index_path=tmp_path / "haystack_test_faiss", config_path=tmp_path / "custom_path.json"
-    )
-
-    # check faiss index is restored
-    assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-    # check if documents are restored
-    assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
-    # Check if the init parameters are kept
-    assert not new_document_store.progress_bar
-
-    # test saving and loading the loaded faiss index
-    new_document_store.save(tmp_path / "haystack_test_faiss", config_path=tmp_path / "custom_path.json")
-    reloaded_document_store = FAISSDocumentStore.load(
-        tmp_path / "haystack_test_faiss", config_path=tmp_path / "custom_path.json"
-    )
-
-    # check faiss index is restored
-    assert reloaded_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-    # check if documents are restored
-    assert len(reloaded_document_store.get_all_documents()) == len(DOCUMENTS)
-    # Check if the init parameters are kept
-    assert not reloaded_document_store.progress_bar
-
-    # test loading the index via init
-    new_document_store = FAISSDocumentStore(
-        faiss_index_path=tmp_path / "haystack_test_faiss", faiss_config_path=tmp_path / "custom_path.json"
-    )
-
-    # check faiss index is restored
-    assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-    # check if documents are restored
-    assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
-    # Check if the init parameters are kept
-    assert not new_document_store.progress_bar
-
-
-@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Test with tmp_path not working on windows runner")
-def test_faiss_index_mutual_exclusive_args(tmp_path):
-    with pytest.raises(ValueError):
-        FAISSDocumentStore(
-            sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
-            faiss_index_path=f"{tmp_path/'haystack_test'}",
+class TestFAISSDocumentStore(DocumentStoreBaseTestAbstract):
+    @pytest.fixture
+    def ds(self, tmp_path):
+        return FAISSDocumentStore(
+            sql_url=f"sqlite:///{tmp_path}/haystack_test.db",
+            return_embedding=True,
            isolation_level="AUTOCOMMIT",
+            progress_bar=False,
+            similarity="cosine",
        )

-    with pytest.raises(ValueError):
-        FAISSDocumentStore(
-            f"sqlite:////{tmp_path/'haystack_test.db'}",
-            faiss_index_path=f"{tmp_path/'haystack_test'}",
-            isolation_level="AUTOCOMMIT",
+    @pytest.fixture
+    def documents_with_embeddings(self, documents):
+        # drop documents without embeddings from the original fixture
+        return [d for d in documents if d.embedding is not None]
+
+    @pytest.mark.unit
+    def test_index_mutual_exclusive_args(self, tmp_path):
+        with pytest.raises(ValueError, match="faiss_index_path"):
+            FAISSDocumentStore(
+                sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
+                faiss_index_path=f"{tmp_path/'haystack_test'}",
+                isolation_level="AUTOCOMMIT",
+            )
+
+        with pytest.raises(ValueError, match="faiss_index_path"):
+            FAISSDocumentStore(
+                f"sqlite:////{tmp_path/'haystack_test.db'}",
+                faiss_index_path=f"{tmp_path/'haystack_test'}",
+                isolation_level="AUTOCOMMIT",
+            )
+
+    @pytest.mark.integration
+    def test_delete_index(self, ds, documents):
+        """Contrary to other Document Stores, FAISSDocumentStore doesn't raise if the index is empty"""
+        ds.write_documents(documents)
+        assert ds.get_document_count() == len(documents)
+        ds.delete_index(ds.index)
+        assert ds.get_document_count() == 0
+
+    @pytest.mark.integration
+    @pytest.mark.parametrize("config_path", [None, "custom_path.json"])
+    def test_index_save_and_load(self, ds, documents_with_embeddings, tmp_path, config_path):
+        if config_path:
+            config_path = tmp_path / config_path
+
+        ds.write_documents(documents_with_embeddings)
+
+        # test saving the index
+        ds.save(index_path=tmp_path / "haystack_test_faiss", config_path=config_path)
+
+        # clear existing faiss_index
+        ds.faiss_indexes[ds.index].reset()
+
+        # test faiss index is cleared
+        assert ds.faiss_indexes[ds.index].ntotal == 0
+
+        # test loading the index
+        new_document_store = FAISSDocumentStore.load(
+            index_path=tmp_path / "haystack_test_faiss", config_path=config_path
        )

+        # check faiss index is restored
+        assert new_document_store.faiss_indexes[ds.index].ntotal == len(documents_with_embeddings)
+        # check if documents are restored
+        assert len(new_document_store.get_all_documents()) == len(documents_with_embeddings)
+        # Check if the init parameters are kept
+        assert not new_document_store.progress_bar

-@pytest.mark.parametrize("document_store", ["faiss"], indirect=True)
-@pytest.mark.parametrize("index_buffer_size", [10_000, 2])
-@pytest.mark.parametrize("batch_size", [2])
-def test_faiss_write_docs(document_store, index_buffer_size, batch_size):
-    document_store.index_buffer_size = index_buffer_size
+        # test saving and loading the loaded faiss index
+        new_document_store.save(tmp_path / "haystack_test_faiss", config_path=config_path)
+        reloaded_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss", config_path=config_path)

-    # Write in small batches
-    for i in range(0, len(DOCUMENTS), batch_size):
-        document_store.write_documents(DOCUMENTS[i : i + batch_size])
+        # check faiss index is restored
+        assert reloaded_document_store.faiss_indexes[ds.index].ntotal == len(documents_with_embeddings)
+        # check if documents are restored
+        assert len(reloaded_document_store.get_all_documents()) == len(documents_with_embeddings)
+        # Check if the init parameters are kept
+        assert not reloaded_document_store.progress_bar

-    documents_indexed = document_store.get_all_documents()
-    assert len(documents_indexed) == len(DOCUMENTS)
+        # test loading the index via init
+        new_document_store = FAISSDocumentStore(
+            faiss_index_path=tmp_path / "haystack_test_faiss", faiss_config_path=config_path
+        )

-    # test if correct vectors are associated with docs
-    for i, doc in enumerate(documents_indexed):
-        # we currently don't get the embeddings back when we call document_store.get_all_documents()
-        original_doc = [d for d in DOCUMENTS if d["content"] == doc.content][0]
-        stored_emb = document_store.faiss_indexes[document_store.index].reconstruct(int(doc.meta["vector_id"]))
-        # compare original input vec with stored one (ignore extra dim added by hnsw)
-        # original input vec is normalized as faiss only stores normalized vectors
-        assert np.allclose(original_doc["embedding"] / np.linalg.norm(original_doc["embedding"]), stored_emb, rtol=0.01)
+        # check faiss index is restored
+        assert new_document_store.faiss_indexes[ds.index].ntotal == len(documents_with_embeddings)
+        # check if documents are restored
+        assert len(new_document_store.get_all_documents()) == len(documents_with_embeddings)
+        # Check if the init parameters are kept
+        assert not new_document_store.progress_bar

+    @pytest.mark.integration
+    @pytest.mark.parametrize("index_buffer_size", [10_000, 2])
+    @pytest.mark.parametrize("index_factory", ["Flat", "HNSW", "IVF1,Flat"])
+    def test_write_index_docs(self, documents_with_embeddings, tmp_path, index_buffer_size, index_factory):
+        document_store = FAISSDocumentStore(
+            sql_url=f"sqlite:///{tmp_path}/test_faiss_retrieving_{index_factory}.db",
+            faiss_index_factory_str=index_factory,
+            isolation_level="AUTOCOMMIT",
+            return_embedding=True,
+        )
+        batch_size = 2
+        document_store.index_buffer_size = index_buffer_size
+        document_store.delete_all_documents(index=document_store.index)
+        if "ivf" in index_factory.lower():
+            document_store.train_index(documents_with_embeddings)
+            document_store.faiss_indexes[document_store.index].make_direct_map()

-@pytest.mark.parametrize("document_store", ["faiss"], indirect=True)
-def test_faiss_write_docs_different_indexes(document_store):
-    document_store.write_documents(DOCUMENTS, index="index1")
-    document_store.write_documents(DOCUMENTS, index="index2")
+        # Write in batches
+        for i in range(0, len(documents_with_embeddings), batch_size):
+            document_store.write_documents(documents_with_embeddings[i : i + batch_size])

-    docs_from_index1 = document_store.get_all_documents(index="index1", return_embedding=False)
-    assert len(docs_from_index1) == len(DOCUMENTS)
-    assert {int(doc.meta["vector_id"]) for doc in docs_from_index1} == set(range(0, 6))
+        documents_indexed = document_store.get_all_documents()
+        assert len(documents_indexed) == len(documents_with_embeddings)
+        assert all(doc.embedding is not None for doc in documents_indexed)

-    docs_from_index2 = document_store.get_all_documents(index="index2", return_embedding=False)
-    assert len(docs_from_index2) == len(DOCUMENTS)
-    assert {int(doc.meta["vector_id"]) for doc in docs_from_index2} == set(range(0, 6))
+    @pytest.mark.integration
+    def test_write_docs_different_indexes(self, ds, documents_with_embeddings):
+        docs_a = documents_with_embeddings[:2]
+        docs_b = documents_with_embeddings[2:]
+        ds.write_documents(docs_a, index="index_a")
+        ds.write_documents(docs_b, index="index_b")

+        docs_from_index_a = ds.get_all_documents(index="index_a", return_embedding=False)
+        assert len(docs_from_index_a) == len(docs_a)
+        assert {int(doc.meta["vector_id"]) for doc in docs_from_index_a} == {0, 1}

-@pytest.mark.parametrize("document_store", ["faiss"], indirect=True)
-def test_faiss_update_docs_different_indexes(document_store):
-    retriever = MockDenseRetriever(document_store=document_store)
+        docs_from_index_b = ds.get_all_documents(index="index_b", return_embedding=False)
+        assert len(docs_from_index_b) == len(docs_b)
+        assert {int(doc.meta["vector_id"]) for doc in docs_from_index_b} == {0, 1, 2, 3}

-    document_store.write_documents(DOCUMENTS, index="index1")
-    document_store.write_documents(DOCUMENTS, index="index2")
+    @pytest.mark.integration
+    def test_update_docs_different_indexes(self, ds, documents_with_embeddings):
+        retriever = MockDenseRetriever(document_store=ds)

-    document_store.update_embeddings(retriever=retriever, update_existing_embeddings=True, index="index1")
-    document_store.update_embeddings(retriever=retriever, update_existing_embeddings=True, index="index2")
+        docs_a = documents_with_embeddings[:2]
+        docs_b = documents_with_embeddings[2:]
+        ds.write_documents(docs_a, index="index_a")
+        ds.write_documents(docs_b, index="index_b")

-    docs_from_index1 = document_store.get_all_documents(index="index1", return_embedding=False)
-    assert len(docs_from_index1) == len(DOCUMENTS)
-    assert {int(doc.meta["vector_id"]) for doc in docs_from_index1} == set(range(0, 6))
+        ds.update_embeddings(retriever=retriever, update_existing_embeddings=True, index="index_a")
+        ds.update_embeddings(retriever=retriever, update_existing_embeddings=True, index="index_b")

-    docs_from_index2 = document_store.get_all_documents(index="index2", return_embedding=False)
-    assert len(docs_from_index2) == len(DOCUMENTS)
-    assert {int(doc.meta["vector_id"]) for doc in docs_from_index2} == set(range(0, 6))
+        docs_from_index_a = ds.get_all_documents(index="index_a", return_embedding=False)
+        assert len(docs_from_index_a) == len(docs_a)
+        assert {int(doc.meta["vector_id"]) for doc in docs_from_index_a} == {0, 1}

+        docs_from_index_b = ds.get_all_documents(index="index_b", return_embedding=False)
+        assert len(docs_from_index_b) == len(docs_b)
+        assert {int(doc.meta["vector_id"]) for doc in docs_from_index_b} == {0, 1, 2, 3}

-@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Test with tmp_path not working on windows runner")
-@pytest.mark.parametrize("index_factory", ["Flat", "HNSW", "IVF1,Flat"])
-def test_faiss_retrieving(index_factory, tmp_path):
-    document_store = FAISSDocumentStore(
-        sql_url=f"sqlite:////{tmp_path/'test_faiss_retrieving.db'}",
-        faiss_index_factory_str=index_factory,
-        isolation_level="AUTOCOMMIT",
-    )
+    @pytest.mark.integration
+    def test_passing_index_from_outside(self, documents_with_embeddings, tmp_path):
+        d = 768
+        nlist = 2
+        quantizer = faiss.IndexFlatIP(d)
+        index = "haystack_test_1"
+        faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT)
+        faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
+        faiss_index.nprobe = 2
+        document_store = FAISSDocumentStore(
+            sql_url=f"sqlite:///", faiss_index=faiss_index, index=index, isolation_level="AUTOCOMMIT"
+        )

-    document_store.delete_all_documents(index="document")
-    if "ivf" in index_factory.lower():
-        document_store.train_index(DOCUMENTS)
-    document_store.write_documents(DOCUMENTS)
+        document_store.delete_documents()
+        # as it is a IVF index we need to train it before adding docs
+        document_store.train_index(documents_with_embeddings)

-    retriever = EmbeddingRetriever(
-        document_store=document_store, embedding_model="deepset/sentence_bert", use_gpu=False
-    )
-    result = retriever.retrieve(query="How to test this?")
+        document_store.write_documents(documents=documents_with_embeddings)
+        documents_indexed = document_store.get_all_documents()

-    assert len(result) == len(DOCUMENTS)
-    assert type(result[0]) == Document
+        # test if vectors ids are associated with docs
+        for doc in documents_indexed:
+            assert 0 <= int(doc.meta["vector_id"]) <= 7

-    # Cleanup
-    document_store.faiss_indexes[document_store.index].reset()
+    @pytest.mark.integration
+    def test_pipeline_with_existing_faiss_docstore(self, ds, documents_with_embeddings, tmp_path):
+        ds.write_documents(documents_with_embeddings)
+        ds.save(tmp_path / "existing_faiss_document_store")
+        pipeline_config = {
+            "version": "ignore",
+            "components": [
+                {
+                    "name": "DPRRetriever",
+                    "type": "MockDenseRetriever",
+                    "params": {"document_store": "ExistingFAISSDocumentStore"},
+                },
+                {
+                    "name": "ExistingFAISSDocumentStore",
+                    "type": "FAISSDocumentStore",
+                    "params": {"faiss_index_path": f"{tmp_path / 'existing_faiss_document_store'}"},
+                },
+            ],
+            "pipelines": [{"name": "query_pipeline", "nodes": [{"name": "DPRRetriever", "inputs": ["Query"]}]}],
+        }
+        pipeline = Pipeline.load_from_config(pipeline_config)
+        existing_document_store = pipeline.get_document_store()
+        faiss_index = existing_document_store.faiss_indexes[ds.index]
+        assert faiss_index.ntotal == len(documents_with_embeddings)

+    # See TestSQLDocumentStore about why we have to skip these tests

-@pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Test with tmp_path not working on windows runner")
-def test_faiss_passing_index_from_outside(tmp_path):
-    d = 768
-    nlist = 2
-    quantizer = faiss.IndexFlatIP(d)
-    index = "haystack_test_1"
-    faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT)
-    faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
-    faiss_index.nprobe = 2
-    document_store = FAISSDocumentStore(
-        sql_url=f"sqlite:////{tmp_path/'haystack_test_faiss.db'}",
-        faiss_index=faiss_index,
-        index=index,
-        isolation_level="AUTOCOMMIT",
-    )
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_ne_filters(self, ds, documents):
+        pass

-    document_store.delete_documents()
-    # as it is a IVF index we need to train it before adding docs
-    document_store.train_index(DOCUMENTS)
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_nin_filters(self, ds, documents):
+        pass

-    document_store.write_documents(documents=DOCUMENTS)
-    documents_indexed = document_store.get_all_documents()
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_comparison_filters(self, ds, documents):
+        pass

-    # test if vectors ids are associated with docs
-    for doc in documents_indexed:
-        assert 0 <= int(doc.meta["vector_id"]) <= 7
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_nested_condition_filters(self, ds, documents):
+        pass

+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_nested_condition_not_filters(self, ds, documents):
+        pass

-@pytest.mark.integration
-def test_pipeline_with_existing_faiss_docstore(tmp_path):
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_delete_labels_by_filter(self, ds, labels):
+        pass

-    document_store: FAISSDocumentStore = FAISSDocumentStore(
-        sql_url=f'sqlite:///{(tmp_path / "faiss_document_store.db").absolute()}'
-    )
-    retriever = MockDenseRetriever(document_store=document_store)
-    document_store.write_documents(DOCUMENTS)
-    document_store.update_embeddings(retriever=retriever, update_existing_embeddings=True)
-
-    document_store.save(tmp_path / "existing_faiss_document_store")
-
-    query_config = f"""
-version: ignore
-components:
-  - name: DPRRetriever
-    type: MockDenseRetriever
-    params:
-      document_store: ExistingFAISSDocumentStore
-  - name: ExistingFAISSDocumentStore
-    type: FAISSDocumentStore
-    params:
-      faiss_index_path: '{tmp_path / "existing_faiss_document_store"}'
-pipelines:
-  - name: query_pipeline
-    nodes:
-      - name: DPRRetriever
-        inputs: [Query]
-    """
-    pipeline = Pipeline.load_from_config(yaml.safe_load(query_config))
-    existing_document_store = pipeline.get_document_store()
-    faiss_index = existing_document_store.faiss_indexes["document"]
-    assert faiss_index.ntotal == len(DOCUMENTS)
+    @pytest.mark.skip
+    @pytest.mark.integration
+    def test_delete_labels_by_filter_id(self, ds, labels):
+        pass